001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.http.api;
019
020import static javax.ws.rs.core.MediaType.APPLICATION_OCTET_STREAM_TYPE;
021import static org.apache.http.HttpHeaders.CONTENT_LENGTH;
022import static org.apache.http.HttpHeaders.CONTENT_TYPE;
023import static org.apache.http.HttpStatus.SC_OK;
024import static org.slf4j.LoggerFactory.getLogger;
025
026import org.apache.http.Header;
027import org.apache.http.client.methods.HttpHead;
028import org.apache.http.client.methods.CloseableHttpResponse;
029import org.apache.http.impl.client.CloseableHttpClient;
030import org.apache.http.impl.client.HttpClients;
031import org.fcrepo.kernel.api.exception.ExternalContentAccessException;
032import org.fcrepo.kernel.api.exception.ExternalMessageBodyException;
033import javax.ws.rs.core.Link;
034
035import java.io.FileInputStream;
036import java.io.IOException;
037import java.io.InputStream;
038import java.net.URI;
039import java.nio.file.Files;
040import java.nio.file.Path;
041import java.nio.file.Paths;
042import java.util.Map;
043
044import org.fcrepo.kernel.api.models.ExternalContent;
045import org.slf4j.Logger;
046
047/**
048 * This class is a helper for dealing with the External Content Link header and External Content itself, in the case
049 * of handling="copy". This class will verify that an External Content Link header is formatted correctly and
050 * help parse it, delivering parts of it when asked.
051 *
052 * @author bseeger
053 * @since 5/7/2018
054 */
055public class ExternalContentHandler implements ExternalContent {
056
057    private static final Logger LOGGER = getLogger(FedoraLdp.class);
058
059    private final static String HANDLING = "handling";
060    private final static String EXT_CONTENT_TYPE = "type";
061
062    private final Link link;
063    private final String handling;
064    private String contentType;
065    private Long contentSize;
066
067    /* link header for external content should look like this:
068          Link: <http://example.org/some/content>;
069          rel="http://fedora.info/definitions/fcrepo#ExternalContent";
070          handling="proxy";
071          type="image/tiff"
072    */
073
074    /**
075     *  Construct an ExternalContentHandler (helper)
076     *
077     *  @param linkHeader actual link header from request
078     */
079    protected ExternalContentHandler(final String linkHeader) {
080        // if it parses, then we're mostly good to go.
081        link = parseLinkHeader(linkHeader);
082
083        final Map<String, String> map = link.getParams();
084        // handling will be in the map, where as content type may not be
085        handling = map.get(HANDLING).toLowerCase();
086        // Retrieve details directly from the content
087        retrieveContentDetails();
088        final var type = map.get(EXT_CONTENT_TYPE) != null ? map.get(EXT_CONTENT_TYPE).toLowerCase() : null;
089        if (type != null) {
090            contentType = type;
091        } else if (contentType == null) {
092            LOGGER.debug("Defaulting to octet stream for media type");
093            contentType = APPLICATION_OCTET_STREAM_TYPE.toString();
094        }
095
096        if (contentSize == null) {
097            contentSize = -1L;
098        }
099    }
100
101    @Override
102    public String getContentType() {
103        return contentType;
104    }
105
106    @Override
107    public long getContentSize() {
108        return contentSize;
109    }
110
111    @Override
112    public String getHandling() {
113        return handling;
114    }
115
116    @Override
117    public String getURL() {
118        return link.getUri().toString();
119    }
120
121    @Override
122    public URI getURI() {
123        return link.getUri();
124    }
125
126    @Override
127    public boolean isCopy() {
128        return COPY.equals(handling);
129    }
130
131    @Override
132    public boolean isRedirect() {
133        return REDIRECT.equals(handling);
134    }
135
136    @Override
137    public boolean isProxy() {
138        return PROXY.equals(handling);
139    }
140
141    @Override
142    public InputStream fetchExternalContent() {
143
144        final URI uri = link.getUri();
145        final String scheme = uri.getScheme();
146        LOGGER.debug("scheme is {}", scheme);
147        if (scheme != null) {
148            try {
149                if (scheme.equals("file")) {
150                    return new FileInputStream(uri.getPath());
151                } else if (scheme.equals("http") || scheme.equals("https")) {
152                    return uri.toURL().openStream();
153                }
154            } catch (final IOException e) {
155                throw new ExternalContentAccessException("Failed to read external content from " + uri, e);
156            }
157        }
158        return null;
159    }
160
161    /**
162     * Validate that an external content link header is appropriately formatted
163     * @param link to be validated
164     * @return Link object if the header is formatted correctly, else null
165     * @throws ExternalMessageBodyException on error
166     */
167    private Link parseLinkHeader(final String link) throws ExternalMessageBodyException {
168        final Link realLink;
169
170        try {
171            realLink = Link.valueOf(link);
172            final String handling = realLink.getParams().get(HANDLING);
173            if (handling == null || !handling.matches("(?i)" + PROXY + "|" + COPY + "|" + REDIRECT)) {
174                // error
175                throw new ExternalMessageBodyException(
176                        "Link header formatted incorrectly: 'handling' parameter incorrect or missing");
177            }
178        } catch (final Exception e) {
179            throw new ExternalMessageBodyException("External content link header url is malformed");
180        }
181        return realLink;
182    }
183
184    private void retrieveContentDetails() {
185        final URI uri = getURI();
186        final String scheme = uri.getScheme().toLowerCase();
187
188        if ("file".equals(scheme)) {
189            final Path path = Paths.get(uri);
190            try {
191                contentSize = Files.size(path);
192            } catch (final IOException e) {
193                throw new ExternalMessageBodyException("Unable to access external binary at URI " + uri, e);
194            }
195        } else if ("http".equals(scheme) || "https".equals(scheme)) {
196            try (final CloseableHttpClient httpClient = HttpClients.createDefault()) {
197                final HttpHead httpHead = new HttpHead(uri);
198                httpHead.setHeader("Accept-Encoding", "identity");
199                try (final CloseableHttpResponse response = httpClient.execute(httpHead)) {
200                    if (response.getStatusLine().getStatusCode() != SC_OK) {
201                        throw new ExternalMessageBodyException("Unable to access external binary at URI " + uri
202                                + " received response " + response.getStatusLine().getStatusCode());
203                    }
204
205                    final Header typeHeader = response.getFirstHeader(CONTENT_TYPE);
206                    if (typeHeader != null) {
207                        contentType = typeHeader.getValue();
208                    }
209                    final Header sizeHeader = response.getFirstHeader(CONTENT_LENGTH);
210                    if (sizeHeader != null) {
211                        contentSize = Long.parseLong(sizeHeader.getValue());
212                    }
213                }
214            } catch (final IOException e) {
215                throw new ExternalMessageBodyException("Unable to access external binary at URI " + uri, e);
216            }
217        }
218    }
219}