001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.kernel.modeshape;
019
020//import static org.modeshape.jcr.api.JcrConstants.JCR_DATA;
021import static org.slf4j.LoggerFactory.getLogger;
022import static org.fcrepo.kernel.api.FedoraExternalContent.PROXY;
023import static org.fcrepo.kernel.api.FedoraExternalContent.REDIRECT;
024
025import java.io.IOException;
026import java.io.InputStream;
027import java.net.HttpURLConnection;
028import java.net.URI;
029import java.net.URISyntaxException;
030import java.util.Collection;
031import java.util.HashMap;
032import java.util.HashSet;
033import java.util.Map;
034import java.util.stream.Collectors;
035
036import javax.jcr.Node;
037import javax.jcr.Property;
038import javax.jcr.RepositoryException;
039import org.apache.jena.rdf.model.Resource;
040import org.fcrepo.kernel.api.RdfStream;
041import org.fcrepo.kernel.api.exception.ExternalContentAccessException;
042import org.fcrepo.kernel.api.exception.InvalidChecksumException;
043import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
044import org.fcrepo.kernel.api.exception.UnsupportedAlgorithmException;
045import org.fcrepo.kernel.api.identifiers.IdentifierConverter;
046import org.fcrepo.kernel.api.models.FedoraResource;
047import org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint;
048import org.fcrepo.kernel.api.utils.CacheEntry;
049import org.fcrepo.kernel.api.utils.ContentDigest;
050import org.fcrepo.kernel.api.utils.FixityResult;
051import org.fcrepo.kernel.modeshape.rdf.impl.FixityRdfContext;
052import org.fcrepo.kernel.modeshape.utils.FedoraTypesUtils;
053import org.fcrepo.kernel.modeshape.utils.impl.CacheEntryFactory;
054import org.slf4j.Logger;
055
056/**
057 * External binary from a url
058 *
059 * @author bbpennel
060 * @since 12/14/2017
061 */
062public class UrlBinary extends AbstractFedoraBinary {
063    private static final Logger LOGGER = getLogger(UrlBinary.class);
064
065    /**
066     * Construct UrlBinary
067     *
068     * @param node node
069     */
070    public UrlBinary(final Node node) {
071        super(node);
072    }
073
074    /*
075     * (non-Javadoc)
076     * @see org.fcrepo.kernel.modeshape.FedoraBinaryImpl#getContent()
077     */
078    @Override
079    public InputStream getContent() {
080        // todo - this needs to be more complete so the proxy information will
081        // make it up to the higher levels. Ie, so one can pass back the response information
082        try {
083            return getResourceUri().toURL().openStream();
084        } catch (final IOException e) {
085            throw new ExternalContentAccessException("Problems getting external content : " + e.getMessage(), e);
086        }
087    }
088
089    protected long getRemoteContentSize() {
090        final URI resourceUri = getResourceUri();
091        try {
092            final HttpURLConnection httpConn = (HttpURLConnection) resourceUri.toURL().openConnection();
093            httpConn.setRequestMethod("HEAD");
094            httpConn.connect();
095
096            final int status = httpConn.getResponseCode();
097
098            if (status == HttpURLConnection.HTTP_OK) {
099                final String contentLength = httpConn.getHeaderField("Content-Length");
100                if (contentLength != null) {
101                    try {
102                        return Long.parseLong(contentLength);
103                    } catch (final NumberFormatException e) {
104                        LOGGER.warn("Unable to parse Content-Length of remote file {}", resourceUri, e);
105                    }
106                }
107            }
108
109        } catch (final IOException e) {
110            LOGGER.warn("Error getting content size for '{}' : '{}'", getPath(), resourceUri, e);
111        }
112        return -1L;
113    }
114
115    @Override
116    public void setExternalContent(final String contentType,
117                           final Collection<URI> checksums, final String originalFileName,
118                           final String externalHandling, final String externalUrl)
119            throws InvalidChecksumException {
120
121        // set a few things on the description node, then set a few more in the other setContent() function
122        final Node descNode = getDescriptionNodeOrNull();
123        final Node contentNode = getNode();
124        try {
125            if (externalHandling.equals(PROXY)) {
126                contentNode.setProperty(PROXY_FOR, externalUrl);
127            } else if (externalHandling.equals(REDIRECT)) {
128                contentNode.setProperty(REDIRECTS_TO, externalUrl);
129            } else {
130                throw new RepositoryException("Unknown external content handling type: " + externalHandling);
131            }
132
133            if (contentNode.canAddMixin(FEDORA_BINARY)) {
134                contentNode.addMixin(FEDORA_BINARY);
135            }
136
137            LOGGER.debug("Created content node at path: {}", contentNode.getPath());
138
139            // Ensure provided checksums are valid
140            final Collection<URI> nonNullChecksums = (null == checksums) ? new HashSet<>() : checksums;
141            verifyChecksums(nonNullChecksums);
142
143            // Store checksums on node
144            final String[] checksumArray = new String[nonNullChecksums.size()];
145            nonNullChecksums.stream().map(Object::toString).collect(Collectors.toSet()).toArray(checksumArray);
146
147            FedoraTypesUtils.touch(contentNode);
148
149            if (descNode != null) {
150                descNode.setProperty(CONTENT_DIGEST, checksumArray);
151                if (contentType != null) {
152                    descNode.setProperty(HAS_MIME_TYPE, contentType);
153                } else {
154                    descNode.setProperty(HAS_MIME_TYPE, DEFAULT_MIME_TYPE);
155                }
156
157                if (originalFileName != null) {
158                    descNode.setProperty(FILENAME, originalFileName);
159                }
160                setContentSize(getRemoteContentSize());
161
162                FedoraTypesUtils.touch(descNode);
163            }
164
165            LOGGER.debug("Set url binary content from path: {}", getResourceLocation());
166
167        } catch (final RepositoryException e) {
168            throw new RepositoryRuntimeException(e);
169        }
170    }
171    /*
172     * (non-Javadoc)
173     * @see org.fcrepo.kernel.modeshape.FedoraBinaryImpl#setContent(java.io.InputStream, java.lang.String,
174     * java.util.Collection, java.lang.String, org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint)
175     */
176    @Override
177    public void setContent(final InputStream content, final String contentType,
178            final Collection<URI> checksums, final String originalFileName,
179            final StoragePolicyDecisionPoint storagePolicyDecisionPoint)
180            throws UnsupportedOperationException {
181        throw new UnsupportedOperationException(
182                "Cannot call setContent() on external binary. Call setExternalContent() instead.");
183    }
184
185
186    private void verifyChecksums(final Collection<URI> checksums)
187            throws InvalidChecksumException, RepositoryException {
188
189        Property property = null;
190        if (isProxy()) {
191            property = getProperty(PROXY_FOR);
192        } else if (isRedirect()) {
193            property = getProperty(REDIRECTS_TO);
194        } // what else could it be?
195
196        final Map<URI, URI> checksumErrors = new HashMap<>();
197
198        final CacheEntry cacheEntry = CacheEntryFactory.forProperty(property);
199        // Loop through provided checksums validating against computed values
200        for (final URI check : checksums) {
201            final String algorithm = ContentDigest.getAlgorithm(check);
202            for (final FixityResult result : cacheEntry.checkFixity(algorithm)) {
203                if (!result.matches(check)) {
204                    LOGGER.debug("Failed checksum test");
205                    checksumErrors.put(check, result.getComputedChecksum());
206                }
207            }
208        }
209
210        // Throw an exception if any checksum errors occurred
211        if (!checksumErrors.isEmpty()) {
212            final String template = "Checksum Mismatch of %1$s and %2$s\n";
213            final StringBuilder error = new StringBuilder();
214            checksumErrors.forEach((key, value) -> error.append(String.format(template, key, value)));
215            throw new InvalidChecksumException(error.toString());
216        }
217
218    }
219
220    /*
221     * (non-Javadoc)
222     * @see
223     * org.fcrepo.kernel.modeshape.FedoraBinaryImpl#getFixity(org.fcrepo.kernel.api.identifiers.IdentifierConverter)
224     */
225    @Override
226    public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator) {
227        return getFixity(idTranslator, getContentDigest(), getContentSize());
228    }
229
230    /*
231     * (non-Javadoc)
232     * @see
233     * org.fcrepo.kernel.modeshape.FedoraBinaryImpl#getFixity(org.fcrepo.kernel.api.identifiers.IdentifierConverter,
234     * java.net.URI, long)
235     */
236    @Override
237    public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator, final URI digestUri,
238            final long size) {
239
240        try {
241
242            LOGGER.debug("Checking resource: " + getPath());
243
244            final String algorithm = ContentDigest.getAlgorithm(digestUri);
245
246            final long contentSize = size < 0 ? getContentSize() : size;
247
248            Collection<FixityResult> fixityResults = null;
249            if (isProxy()) {
250                fixityResults = CacheEntryFactory.forProperty(getProperty(PROXY_FOR)).checkFixity(algorithm);
251            } else if (isRedirect()) {
252                fixityResults =
253                    CacheEntryFactory.forProperty(getProperty(REDIRECTS_TO)).checkFixity(algorithm);
254            } else {
255                LOGGER.warn("URL Binary -- not proxy or redirect, so what is it?");
256                throw new RepositoryException("Binary resource marked as external is not proxy or redirect.");
257            }
258            return new FixityRdfContext(this, idTranslator, fixityResults, digestUri, contentSize);
259        } catch (final RepositoryException e) {
260            throw new RepositoryRuntimeException(e);
261        }
262    }
263
264    @Override
265    public Collection<URI> checkFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator,
266            final Collection<String> algorithms)
267            throws UnsupportedAlgorithmException {
268
269        try {
270
271            Collection<URI> list = null;
272            if (isProxy()) {
273                list = CacheEntryFactory.forProperty(getProperty(PROXY_FOR)).checkFixity(algorithms);
274            } else if (isRedirect()) {
275                list = CacheEntryFactory.forProperty(getProperty(REDIRECTS_TO)).checkFixity(algorithms);
276            } else {
277                throw new RepositoryRuntimeException("External content error: not proxy or redirect");
278            }
279            return list;
280        } catch (final RepositoryException e) {
281            throw new RepositoryRuntimeException(e);
282        }
283
284    }
285
286    /**
287     * Returns the specified mimetype in place of the original external-body if provided
288     */
289    @Override
290    public String getMimeType() {
291        return getMimeTypeValue();
292    }
293
294    /**
295     * Gets the URL for where this resource is actually located
296     * @return String containing actual location of resource
297     */
298    private String getResourceLocation() {
299        if (isProxy()) {
300            return getProxyURL();
301        } else {
302            return getRedirectURL();
303        }
304    }
305
306    /**
307     * Get a URI for the resource
308     * @return URI object representing resource's location
309     */
310    protected URI getResourceUri() {
311        try {
312            return new URI(getResourceLocation());
313        } catch (final URISyntaxException e) {
314            throw new RepositoryRuntimeException(e);
315        }
316    }
317}