001/* 002 * Licensed to DuraSpace under one or more contributor license agreements. 003 * See the NOTICE file distributed with this work for additional information 004 * regarding copyright ownership. 005 * 006 * DuraSpace licenses this file to you under the Apache License, 007 * Version 2.0 (the "License"); you may not use this file except in 008 * compliance with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.fcrepo.connector.file; 019 020import static java.lang.System.currentTimeMillis; 021import static org.fcrepo.kernel.api.FedoraTypes.CONTENT_DIGEST; 022import static org.fcrepo.kernel.api.FedoraTypes.CONTENT_SIZE; 023import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_BINARY; 024import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_CONTAINER; 025import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_NON_RDF_SOURCE_DESCRIPTION; 026import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_RESOURCE; 027import static org.fcrepo.kernel.api.utils.ContentDigest.asURI; 028import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.JCR_CREATED; 029import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.JCR_LASTMODIFIED; 030import static org.modeshape.jcr.api.JcrConstants.JCR_DATA; 031import static org.modeshape.jcr.api.JcrConstants.NT_FILE; 032import static org.modeshape.jcr.api.JcrConstants.NT_FOLDER; 033import static org.modeshape.jcr.api.JcrConstants.NT_RESOURCE; 034 035import java.io.File; 036import java.net.URI; 037import java.util.Date; 038import java.util.HashMap; 039import java.io.IOException; 040import java.util.Map; 041 042import com.google.common.annotations.VisibleForTesting; 043 044import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; 045import org.infinispan.schematic.document.Document; 046import org.modeshape.connector.filesystem.FileSystemConnector; 047import org.modeshape.jcr.api.value.DateTime; 048import org.modeshape.jcr.api.nodetype.NodeTypeManager; 049import org.modeshape.jcr.spi.federation.DocumentChanges; 050import org.modeshape.jcr.spi.federation.DocumentReader; 051import org.modeshape.jcr.spi.federation.DocumentWriter; 052import org.modeshape.jcr.value.BinaryValue; 053import org.modeshape.jcr.value.Name; 054import org.modeshape.jcr.value.Property; 055import org.modeshape.jcr.value.basic.BasicSingleValueProperty; 056import org.slf4j.Logger; 057import org.slf4j.LoggerFactory; 058 059import javax.jcr.NamespaceRegistry; 060import javax.jcr.RepositoryException; 061 062/** 063 * This class extends the {@link FileSystemConnector} to enable the autocreation of Fedora-specific datastream and 064 * content properties. 065 * 066 * @author Andrew Woods 067 * Date: 1/30/14 068 */ 069public class FedoraFileSystemConnector extends FileSystemConnector { 070 071 private static final Logger LOGGER = LoggerFactory.getLogger(FedoraFileSystemConnector.class); 072 073 private static final String DELIMITER = "/"; 074 private static final String JCR_CONTENT = "jcr:content"; 075 private static final String JCR_CONTENT_SUFFIX = DELIMITER + JCR_CONTENT; 076 077 /** 078 * The string path for a {@link File} object that represents the top-level directory in which properties are 079 * stored. This is optional for this connector, but if set allows properties to be cached (greatly 080 * improving performance) for even read-only connectors. When this property is specified the extraPropertiesStore 081 * should be null (not specified) as it would be overridden by this. 082 */ 083 private String propertiesDirectoryPath; 084 private File propertiesDirectory; 085 086 @Override 087 public void initialize(final NamespaceRegistry registry, 088 final NodeTypeManager nodeTypeManager) throws IOException { 089 LOGGER.warn("FedoraFileSystemConnector will be removed from the core of Fedora in a coming release. " 090 + "See https://jira.duraspace.org/browse/FCREPO-2028 for more information."); 091 try { 092 super.initialize(registry, nodeTypeManager); 093 } catch (final RepositoryException e) { 094 throw new RepositoryRuntimeException("Error initializing FedoraFileSystemConnector!", e); 095 } 096 097 if (propertiesDirectoryPath != null) { 098 propertiesDirectory = new File(propertiesDirectoryPath); 099 if (!propertiesDirectory.exists() || !propertiesDirectory.isDirectory()) { 100 throw new RepositoryRuntimeException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath 101 + ", does not exist or is not a directory."); 102 } else if ( !propertiesDirectory.canRead() || !propertiesDirectory.canWrite() ) { 103 throw new RepositoryRuntimeException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath 104 + ", should be readable and writable."); 105 } 106 if (extraPropertiesStore() != null) { 107 LOGGER.warn("Extra properties store was specified but won't be used!"); 108 } 109 setExtraPropertiesStore(new ExternalJsonSidecarExtraPropertyStore(this, translator(), propertiesDirectory)); 110 } 111 } 112 113 /** 114 * This method returns the object/document for the node with the federated arg 'id'. 115 * 116 * Additionally, this method adds Fedora datastream and content properties to the result of the parent class 117 * implementation. 118 */ 119 @Override 120 public Document getDocumentById(final String id) { 121 LOGGER.debug("Getting Federated document: {}", id); 122 if (null == id || id.isEmpty()) { 123 LOGGER.warn("Can not get document with null id"); 124 return null; 125 } 126 127 final Document doc = super.getDocumentById(id); 128 if ( doc == null ) { 129 LOGGER.debug("Non-existent node, document is null: {}", id); 130 return doc; 131 } 132 133 final DocumentReader docReader = readDocument(doc); 134 final DocumentWriter docWriter = writeDocument(doc); 135 final long lastmod = fileFor(id).lastModified(); 136 LOGGER.debug("Adding lastModified={}", lastmod); 137 docWriter.addProperty(JCR_LASTMODIFIED, lastmod); 138 139 final String primaryType = docReader.getPrimaryTypeName(); 140 141 if (!docReader.getMixinTypeNames().contains(FEDORA_RESOURCE)) { 142 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_RESOURCE, id); 143 docWriter.addMixinType(FEDORA_RESOURCE); 144 } 145 146 // Is Fedora Datastream? 147 if (primaryType.equals(NT_FILE)) { 148 decorateDatastreamNode(docReader, docWriter); 149 150 // Is Fedora Content? 151 } else if (primaryType.equals(NT_RESOURCE)) { 152 decorateContentNode(docReader, docWriter, fileFor(id)); 153 154 // Is Fedora Object? 155 } else if (primaryType.equals(NT_FOLDER)) { 156 decorateObjectNode(docReader, docWriter); 157 } 158 159 return docWriter.document(); 160 } 161 162 /** 163 * Checks whether internally managed properties can and should be stored to 164 * an ExtraPropertiesStore. 165 * @return whether internally managed properties can and should be stored to 166 */ 167 protected boolean shouldCacheProperties() { 168 return extraPropertiesStore() != null && (!isReadonly() || this.propertiesDirectory != null); 169 } 170 171 172 /** 173 * Pass-thru to the parent class in order to make this function public 174 * 175 * @param id the node ID to test 176 * @return whether the id corresponds to the root location 177 */ 178 @Override 179 public boolean isRoot(final String id) { 180 return super.isRoot(id); 181 } 182 183 /** 184 * Pass-thru to the parent class in order to make this function public 185 * 186 * @param file the file used to compute a sha1 hash 187 * @return the sha1 hash of the file contents 188 */ 189 @Override 190 public String sha1(final File file) { 191 final String cachedSha1 = getCachedSha1(file); 192 if (cachedSha1 == null) { 193 return computeAndCacheSha1(file); 194 } 195 return cachedSha1; 196 } 197 198 private String getCachedSha1(final File file) { 199 final String id = idFor(file) + JCR_CONTENT_SUFFIX; 200 if (extraPropertiesStore() != null) { 201 final Map<Name, Property> extraProperties = extraPropertiesStore().getProperties(id); 202 final Name digestName = nameFrom(CONTENT_DIGEST); 203 if (extraProperties.containsKey(digestName)) { 204 if (!hasBeenModifiedSincePropertiesWereStored(file, extraProperties.get(nameFrom(JCR_CREATED)))) { 205 LOGGER.trace("Found sha1 for {} in extra properties store.", id); 206 final String uriStr = ((URI) extraProperties.get(digestName).getFirstValue()).toString(); 207 return uriStr.substring(uriStr.indexOf("sha1:") + 5); 208 } 209 } 210 } else { 211 LOGGER.trace("No cache configured to contain object hashes."); 212 } 213 return null; 214 } 215 216 private String computeAndCacheSha1(final File file) { 217 final String id = idFor(file) + JCR_CONTENT_SUFFIX; 218 LOGGER.trace("Computing sha1 for {}.", id); 219 final String sha1 = super.sha1(file); 220 if (shouldCacheProperties()) { 221 final Map<Name, Property> updateMap = new HashMap<>(); 222 final Property digestProperty = new BasicSingleValueProperty(nameFrom(CONTENT_DIGEST), 223 asURI("SHA-1", sha1)); 224 final Property digestDateProperty = new BasicSingleValueProperty(nameFrom(JCR_CREATED), 225 factories().getDateFactory().create(file.lastModified())); 226 updateMap.put(digestProperty.getName(), digestProperty); 227 updateMap.put(digestDateProperty.getName(), digestDateProperty); 228 extraPropertiesStore().updateProperties(id, updateMap); 229 } 230 return sha1; 231 } 232 233 private static void decorateObjectNode(final DocumentReader docReader, final DocumentWriter docWriter) { 234 if (!docReader.getMixinTypeNames().contains(FEDORA_CONTAINER)) { 235 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_CONTAINER, docReader.getDocumentId()); 236 docWriter.addMixinType(FEDORA_CONTAINER); 237 } 238 } 239 240 private static void decorateDatastreamNode(final DocumentReader docReader, final DocumentWriter docWriter) { 241 if (!docReader.getMixinTypeNames().contains(FEDORA_NON_RDF_SOURCE_DESCRIPTION)) { 242 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_NON_RDF_SOURCE_DESCRIPTION, docReader.getDocumentId()); 243 docWriter.addMixinType(FEDORA_NON_RDF_SOURCE_DESCRIPTION); 244 } 245 } 246 247 private static void decorateContentNode(final DocumentReader docReader, 248 final DocumentWriter docWriter, 249 final File file) { 250 if (!docReader.getMixinTypeNames().contains(FEDORA_BINARY)) { 251 LOGGER.trace("Adding mixin: {}, to {}", FEDORA_BINARY, docReader.getDocumentId()); 252 docWriter.addMixinType(FEDORA_BINARY); 253 } 254 255 if (null == docReader.getProperty(CONTENT_DIGEST) 256 || hasBeenModifiedSincePropertiesWereStored(file, docReader.getProperty(JCR_CREATED))) { 257 final BinaryValue binaryValue = getBinaryValue(docReader); 258 final String dsChecksum = binaryValue.getHexHash(); 259 final String dsURI = asURI("SHA-1", dsChecksum).toString(); 260 261 LOGGER.trace("Adding {} property of {} to {}", CONTENT_DIGEST, dsURI, docReader.getDocumentId()); 262 docWriter.addProperty(CONTENT_DIGEST, dsURI); 263 } 264 265 if (null == docReader.getProperty(CONTENT_SIZE)) { 266 final long binarySize = file.length(); 267 LOGGER.trace("Adding {} property of {} to {}", CONTENT_SIZE, binarySize, docReader.getDocumentId()); 268 docWriter.addProperty(CONTENT_SIZE, binarySize); 269 } 270 271 LOGGER.debug("Decorated data property at path: {}", docReader.getDocumentId()); 272 } 273 274 private static boolean hasBeenModifiedSincePropertiesWereStored(final File file, final Property lastModified) { 275 if (lastModified == null) { 276 LOGGER.trace("Hash for {} has not been computed yet.", file.getName()); 277 return true; 278 } 279 final DateTime datetime = (DateTime) lastModified.getFirstValue(); 280 if (datetime.toDate().equals(new Date(file.lastModified()))) { 281 return false; 282 } 283 LOGGER.trace("{} has been modified ({}) since hash was last computed ({}).", file.getName(), 284 new Date(file.lastModified()), datetime.toDate()); 285 return true; 286 } 287 288 private static BinaryValue getBinaryValue(final DocumentReader docReader) { 289 final Property binaryProperty = docReader.getProperty(JCR_DATA); 290 return (BinaryValue) binaryProperty.getFirstValue(); 291 } 292 293 /* Override write operations to also update the parent file's timestamp, so 294 its Last-Modified header correctly reflects changes to children. */ 295 @Override 296 public boolean removeDocument( final String id ) { 297 if ( super.removeDocument(id) ) { 298 touchParent(id); 299 return true; 300 } 301 return false; 302 } 303 304 @Override 305 public void storeDocument( final Document document ) { 306 super.storeDocument( document ); 307 touchParent(readDocument(document).getDocumentId()); 308 } 309 310 @Override 311 public void updateDocument( final DocumentChanges changes ) { 312 super.updateDocument( changes ); 313 touchParent( changes.getDocumentId() ); 314 } 315 316 /** 317 * Find the parent file, and set its timestamp to the current time. This 318 * timestamp will be used for populating the Last-Modified header. 319 * @param id the id 320 **/ 321 protected void touchParent( final String id ) { 322 if (!isRoot(id)) { 323 final File file = fileFor(id); 324 final File parent = file.getParentFile(); 325 parent.setLastModified(currentTimeMillis()); 326 } 327 } 328 329 /* Overriding so unit test can mock. */ 330 @Override 331 @VisibleForTesting 332 protected File fileFor( final String id ) { 333 return super.fileFor(id); 334 } 335 @Override 336 @VisibleForTesting 337 protected DocumentReader readDocument( final Document document ) { 338 return super.readDocument(document); 339 } 340 341 /* Overriding to make the FedoraFileSystemConnector is always read-only. */ 342 @Override 343 public boolean isReadonly() { 344 return true; 345 } 346 347 @Override 348 public boolean isContentNode(final String id) { 349 return super.isContentNode(id); 350 } 351 352}