001/* 002 * Licensed to DuraSpace under one or more contributor license agreements. 003 * See the NOTICE file distributed with this work for additional information 004 * regarding copyright ownership. 005 * 006 * DuraSpace licenses this file to you under the Apache License, 007 * Version 2.0 (the "License"); you may not use this file except in 008 * compliance with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.fcrepo.kernel.modeshape; 019 020import com.codahale.metrics.Counter; 021import com.codahale.metrics.Histogram; 022import com.codahale.metrics.Timer; 023import org.apache.jena.rdf.model.Resource; 024import org.fcrepo.kernel.api.exception.InvalidChecksumException; 025import org.fcrepo.kernel.api.exception.PathNotFoundRuntimeException; 026import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 027import org.fcrepo.kernel.api.identifiers.IdentifierConverter; 028import org.fcrepo.kernel.api.models.NonRdfSourceDescription; 029import org.fcrepo.kernel.api.models.FedoraBinary; 030import org.fcrepo.kernel.api.models.FedoraResource; 031import org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint; 032import org.fcrepo.kernel.api.RdfStream; 033import org.fcrepo.kernel.api.utils.CacheEntry; 034import org.fcrepo.kernel.api.utils.ContentDigest; 035import org.fcrepo.kernel.api.utils.FixityResult; 036import org.fcrepo.kernel.modeshape.rdf.impl.FixityRdfContext; 037import org.fcrepo.kernel.modeshape.utils.FedoraTypesUtils; 038import org.fcrepo.kernel.modeshape.utils.impl.CacheEntryFactory; 039import org.fcrepo.metrics.RegistryService; 040import org.modeshape.jcr.api.Binary; 041import org.modeshape.jcr.api.ValueFactory; 042import org.slf4j.Logger; 043 044import javax.jcr.Node; 045import javax.jcr.PathNotFoundException; 046import javax.jcr.Property; 047import javax.jcr.RepositoryException; 048import javax.jcr.Value; 049import java.io.InputStream; 050import java.net.URI; 051import java.util.Collection; 052import java.util.HashMap; 053import java.util.HashSet; 054import java.util.Map; 055import java.util.Optional; 056import java.util.stream.Collectors; 057 058import static com.codahale.metrics.MetricRegistry.name; 059import static org.apache.jena.datatypes.xsd.XSDDatatype.XSDstring; 060import static org.fcrepo.kernel.api.utils.ContentDigest.DIGEST_ALGORITHM.SHA1; 061import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.FIELD_DELIMITER; 062import static org.fcrepo.kernel.modeshape.services.functions.JcrPropertyFunctions.property2values; 063import static org.fcrepo.kernel.modeshape.utils.FedoraTypesUtils.isFedoraBinary; 064import static org.modeshape.jcr.api.JcrConstants.JCR_CONTENT; 065import static org.modeshape.jcr.api.JcrConstants.JCR_DATA; 066import static org.slf4j.LoggerFactory.getLogger; 067 068/** 069 * @author cabeer 070 * @since 9/19/14 071 */ 072public class FedoraBinaryImpl extends FedoraResourceImpl implements FedoraBinary { 073 074 private static final Logger LOGGER = getLogger(FedoraBinaryImpl.class); 075 076 077 static final RegistryService registryService = RegistryService.getInstance(); 078 static final Counter fixityCheckCounter 079 = registryService.getMetrics().counter(name(FedoraBinary.class, "fixity-check-counter")); 080 081 static final Timer timer = registryService.getMetrics().timer( 082 name(NonRdfSourceDescription.class, "fixity-check-time")); 083 084 static final Histogram contentSizeHistogram = 085 registryService.getMetrics().histogram(name(FedoraBinary.class, "content-size")); 086 087 /** 088 * Wrap an existing Node as a Fedora Binary 089 * @param node the node 090 */ 091 public FedoraBinaryImpl(final Node node) { 092 super(node); 093 094 if (node.isNew()) { 095 initializeNewBinaryProperties(); 096 } 097 } 098 099 private void initializeNewBinaryProperties() { 100 try { 101 decorateContentNode(node, new HashSet<>()); 102 } catch (final RepositoryException e) { 103 LOGGER.warn("Count not decorate {} with FedoraBinary properties: {}", node, e); 104 } 105 } 106 107 @Override 108 public FedoraResource getDescription() { 109 try { 110 return new NonRdfSourceDescriptionImpl(getNode().getParent()); 111 } catch (final RepositoryException e) { 112 throw new RepositoryRuntimeException(e); 113 } 114 } 115 116 /* 117 * (non-Javadoc) 118 * @see org.fcrepo.kernel.api.models.FedoraBinary#getContent() 119 */ 120 @Override 121 public InputStream getContent() { 122 try { 123 return getBinaryContent().getStream(); 124 } catch (final RepositoryException e) { 125 throw new RepositoryRuntimeException(e); 126 } 127 } 128 129 /** 130 * Retrieve the JCR Binary object 131 * @return a JCR-wrapped Binary object 132 */ 133 private javax.jcr.Binary getBinaryContent() { 134 try { 135 return getProperty(JCR_DATA).getBinary(); 136 } catch (final PathNotFoundException e) { 137 throw new PathNotFoundRuntimeException(e); 138 } catch (final RepositoryException e) { 139 throw new RepositoryRuntimeException(e); 140 } 141 } 142 143 /* 144 * (non-Javadoc) 145 * @see org.fcrepo.kernel.api.models.FedoraBinary#setContent(java.io.InputStream, 146 * java.lang.String, java.net.URI, java.lang.String, 147 * org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint) 148 */ 149 @Override 150 public void setContent(final InputStream content, final String contentType, 151 final Collection<URI> checksums, final String originalFileName, 152 final StoragePolicyDecisionPoint storagePolicyDecisionPoint) 153 throws InvalidChecksumException { 154 155 try { 156 final Node contentNode = getNode(); 157 158 if (contentNode.canAddMixin(FEDORA_BINARY)) { 159 contentNode.addMixin(FEDORA_BINARY); 160 } 161 162 if (contentType != null) { 163 contentNode.setProperty(HAS_MIME_TYPE, contentType); 164 } 165 166 if (originalFileName != null) { 167 contentNode.setProperty(FILENAME, originalFileName); 168 } 169 170 LOGGER.debug("Created content node at path: {}", contentNode.getPath()); 171 172 String hint = null; 173 174 if (storagePolicyDecisionPoint != null) { 175 hint = storagePolicyDecisionPoint.evaluatePolicies(this); 176 } 177 final ValueFactory modevf = 178 (ValueFactory) node.getSession().getValueFactory(); 179 final Binary binary = modevf.createBinary(content, hint); 180 181 /* 182 * This next line of code deserves explanation. If we chose for the 183 * simpler line: Property dataProperty = 184 * contentNode.setProperty(JCR_DATA, requestBodyStream); then the JCR 185 * would not block on the stream's completion, and we would return to 186 * the requester before the mutation to the repo had actually completed. 187 * So instead we use createBinary(requestBodyStream), because its 188 * contract specifies: "The passed InputStream is closed before this 189 * method returns either normally or because of an exception." which 190 * lets us block and not return until the job is done! The simpler code 191 * may still be useful to us for an asynchronous method that we develop 192 * later. 193 */ 194 final Property dataProperty = contentNode.setProperty(JCR_DATA, binary); 195 196 // Ensure provided checksums are valid 197 final Collection<URI> nonNullChecksums = (null == checksums) ? new HashSet<>() : checksums; 198 verifyChecksums(nonNullChecksums, dataProperty); 199 200 decorateContentNode(contentNode, nonNullChecksums); 201 FedoraTypesUtils.touch(getNode()); 202 FedoraTypesUtils.touch(((FedoraResourceImpl) getDescription()).getNode()); 203 204 LOGGER.debug("Created data property at path: {}", dataProperty.getPath()); 205 206 } catch (final RepositoryException e) { 207 throw new RepositoryRuntimeException(e); 208 } 209 } 210 211 /** 212 * This method ensures that the arg checksums are valid against the binary associated with the arg dataProperty. 213 * If one or more of the checksums are invalid, an InvalidChecksumException is thrown. 214 * 215 * @param checksums that the user provided 216 * @param dataProperty containing the binary against which the checksums will be verified 217 * @throws InvalidChecksumException 218 * @throws RepositoryException 219 */ 220 private void verifyChecksums(final Collection<URI> checksums, final Property dataProperty) 221 throws InvalidChecksumException, RepositoryException { 222 223 final Map<URI, URI> checksumErrors = new HashMap<>(); 224 225 // Loop through provided checksums validating against computed values 226 checksums.forEach(checksum -> { 227 final String algorithm = ContentDigest.getAlgorithm(checksum); 228 try { 229 // The case internally supported by ModeShape 230 if (algorithm.equals(SHA1.algorithm)) { 231 final String dsSHA1 = ((Binary) dataProperty.getBinary()).getHexHash(); 232 final URI dsSHA1Uri = ContentDigest.asURI(SHA1.algorithm, dsSHA1); 233 234 if (!dsSHA1Uri.equals(checksum)) { 235 LOGGER.debug("Failed checksum test"); 236 checksumErrors.put(checksum, dsSHA1Uri); 237 } 238 239 // The case that requires re-computing the checksum 240 } else { 241 final CacheEntry cacheEntry = CacheEntryFactory.forProperty(dataProperty); 242 cacheEntry.checkFixity(algorithm).stream().findFirst().ifPresent( 243 fixityResult -> { 244 if (!fixityResult.matches(checksum)) { 245 LOGGER.debug("Failed checksum test"); 246 checksumErrors.put(checksum, fixityResult.getComputedChecksum()); 247 } 248 } 249 ); 250 } 251 } catch (RepositoryException e) { 252 throw new RepositoryRuntimeException(e); 253 } 254 }); 255 256 // Throw an exception if any checksum errors occurred 257 if (!checksumErrors.isEmpty()) { 258 final String template = "Checksum Mismatch of %1$s and %2$s\n"; 259 final StringBuilder error = new StringBuilder(); 260 checksumErrors.forEach((key, value) -> error.append(String.format(template, key, value))); 261 throw new InvalidChecksumException(error.toString()); 262 } 263 264 } 265 266 /* 267 * (non-Javadoc) 268 * @see org.fcrepo.kernel.api.models.FedoraBinary#getContentSize() 269 */ 270 @Override 271 public long getContentSize() { 272 try { 273 if (hasProperty(CONTENT_SIZE)) { 274 return getProperty(CONTENT_SIZE).getLong(); 275 } 276 } catch (final RepositoryException e) { 277 LOGGER.info("Could not get contentSize(): {}", e.getMessage()); 278 } 279 280 return -1L; 281 } 282 283 /* 284 * (non-Javadoc) 285 * @see org.fcrepo.kernel.api.models.FedoraBinary#getContentDigest() 286 */ 287 @Override 288 public URI getContentDigest() { 289 try { 290 // Determine which digest algorithm to use 291 final String algorithm = hasProperty(DEFAULT_DIGEST_ALGORITHM) ? 292 property2values.apply(getProperty(DEFAULT_DIGEST_ALGORITHM)).findFirst().get().getString() : 293 ContentDigest.DEFAULT_ALGORITHM; 294 final String algorithmWithoutStringType = algorithm.replace(FIELD_DELIMITER + XSDstring.getURI(), ""); 295 296 if (hasProperty(CONTENT_DIGEST)) { 297 // Select the stored digest that matches the digest algorithm 298 Optional<Value> digestValue = property2values.apply(getProperty(CONTENT_DIGEST)).filter(digest -> { 299 try { 300 final URI digestUri = URI.create(digest.getString()); 301 return algorithmWithoutStringType.equalsIgnoreCase(ContentDigest.getAlgorithm(digestUri)); 302 303 } catch (RepositoryException e) { 304 LOGGER.warn("Exception thrown when getting digest property {}, {}", digest, e.getMessage()); 305 return false; 306 } 307 }).findFirst(); 308 309 // Success, return the digest value 310 if (digestValue.isPresent()) { 311 return URI.create(digestValue.get().getString()); 312 } 313 } 314 LOGGER.warn("No digest value was found to match the algorithm: {}", algorithmWithoutStringType); 315 } catch (final RepositoryException e) { 316 LOGGER.warn("Could not get content digest: {}", e.getMessage()); 317 } 318 319 return ContentDigest.missingChecksum(); 320 } 321 322 /* 323 * (non-Javadoc) 324 * @see org.fcrepo.kernel.api.models.FedoraBinary#getMimeType() 325 */ 326 @Override 327 public String getMimeType() { 328 try { 329 if (hasProperty(HAS_MIME_TYPE)) { 330 return getProperty(HAS_MIME_TYPE).getString().replace(FIELD_DELIMITER + XSDstring.getURI(), ""); 331 } 332 return "application/octet-stream"; 333 } catch (final RepositoryException e) { 334 throw new RepositoryRuntimeException(e); 335 } 336 } 337 338 /* 339 * (non-Javadoc) 340 * @see org.fcrepo.kernel.api.models.FedoraBinary#getFilename() 341 */ 342 @Override 343 public String getFilename() { 344 try { 345 if (hasProperty(FILENAME)) { 346 return getProperty(FILENAME).getString().replace(FIELD_DELIMITER + XSDstring.getURI(), ""); 347 } 348 return node.getParent().getName(); 349 } catch (final RepositoryException e) { 350 throw new RepositoryRuntimeException(e); 351 } 352 } 353 354 @Override 355 public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator) { 356 return getFixity(idTranslator, getContentDigest(), getContentSize()); 357 } 358 359 @Override 360 public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator, 361 final URI digestUri, 362 final long size) { 363 364 fixityCheckCounter.inc(); 365 366 try (final Timer.Context context = timer.time()) { 367 368 LOGGER.debug("Checking resource: " + getPath()); 369 370 final String algorithm = ContentDigest.getAlgorithm(digestUri); 371 372 final long contentSize = size < 0 ? getBinaryContent().getSize() : size; 373 374 final Collection<FixityResult> fixityResults 375 = CacheEntryFactory.forProperty(getProperty(JCR_DATA)).checkFixity(algorithm); 376 377 return new FixityRdfContext(this, idTranslator, fixityResults, digestUri, contentSize); 378 } catch (final RepositoryException e) { 379 throw new RepositoryRuntimeException(e); 380 } 381 } 382 383 /** 384 * When deleting the binary, we also need to clean up the description document. 385 */ 386 @Override 387 public void delete() { 388 final FedoraResource description = getDescription(); 389 390 super.delete(); 391 392 description.delete(); 393 } 394 395 @Override 396 public FedoraResource getBaseVersion() { 397 return getDescription().getBaseVersion(); 398 } 399 400 private static void decorateContentNode(final Node contentNode, final Collection<URI> checksums) 401 throws RepositoryException { 402 if (contentNode == null) { 403 LOGGER.warn("{} node appears to be null!", JCR_CONTENT); 404 return; 405 } 406 if (contentNode.canAddMixin(FEDORA_BINARY)) { 407 contentNode.addMixin(FEDORA_BINARY); 408 } 409 410 if (contentNode.hasProperty(JCR_DATA)) { 411 final Property dataProperty = contentNode.getProperty(JCR_DATA); 412 final Binary binary = (Binary) dataProperty.getBinary(); 413 final String dsChecksum = binary.getHexHash(); 414 415 contentSizeHistogram.update(dataProperty.getLength()); 416 417 checksums.add(ContentDigest.asURI(SHA1.algorithm, dsChecksum)); 418 419 final String[] checksumArray = new String[checksums.size()]; 420 checksums.stream().map(Object::toString).collect(Collectors.toSet()).toArray(checksumArray); 421 422 contentNode.setProperty(CONTENT_DIGEST, checksumArray); 423 contentNode.setProperty(CONTENT_SIZE, dataProperty.getLength()); 424 425 LOGGER.debug("Decorated data property at path: {}", dataProperty.getPath()); 426 } 427 } 428 429 @Override 430 public boolean isVersioned() { 431 return getDescription().isVersioned(); 432 } 433 434 @Override 435 public void enableVersioning() { 436 super.enableVersioning(); 437 getDescription().enableVersioning(); 438 } 439 440 @Override 441 public void disableVersioning() { 442 super.disableVersioning(); 443 getDescription().disableVersioning(); 444 } 445 446 /** 447 * Check if the given node is a Fedora binary 448 * @param node the given node 449 * @return whether the given node is a Fedora binary 450 */ 451 public static boolean hasMixin(final Node node) { 452 return isFedoraBinary.test(node); 453 } 454}