001/* 002 * Licensed to DuraSpace under one or more contributor license agreements. 003 * See the NOTICE file distributed with this work for additional information 004 * regarding copyright ownership. 005 * 006 * DuraSpace licenses this file to you under the Apache License, 007 * Version 2.0 (the "License"); you may not use this file except in 008 * compliance with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.fcrepo.kernel.api.identifiers; 019 020import static org.fcrepo.kernel.api.FedoraTypes.FCR_ACL; 021import static org.fcrepo.kernel.api.FedoraTypes.FCR_METADATA; 022import static org.fcrepo.kernel.api.FedoraTypes.FCR_TOMBSTONE; 023import static org.fcrepo.kernel.api.FedoraTypes.FCR_VERSIONS; 024import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_ID_PREFIX; 025import static org.fcrepo.kernel.api.services.VersionService.MEMENTO_LABEL_FORMATTER; 026 027import java.time.Instant; 028import java.time.format.DateTimeParseException; 029import java.util.Arrays; 030import java.util.Objects; 031import java.util.Set; 032import java.util.regex.Pattern; 033import java.util.stream.Collectors; 034 035import org.fcrepo.kernel.api.exception.InvalidMementoPathException; 036import org.fcrepo.kernel.api.exception.InvalidResourceIdentifierException; 037 038import org.apache.commons.lang3.StringUtils; 039 040import com.fasterxml.jackson.annotation.JsonCreator; 041import com.fasterxml.jackson.annotation.JsonValue; 042import com.google.common.escape.Escaper; 043import com.google.common.net.PercentEscaper; 044 045/** 046 * Class to store contextual information about a Fedora ID. 047 * 048 * Differentiates between the original ID of the request and the actual resource we are operating on. 049 * 050 * Resource Id : the shortened ID of the base resource, mostly needed to access the correct persistence object. 051 * fullId : the full ID from the request, used in most cases. 052 * 053 * So a fullId of info:fedora/object1/another/fcr:versions/20000101121212 has an id of info:fedora/object1/another 054 * 055 * @author whikloj 056 * @since 6.0.0 057 */ 058public class FedoraId { 059 060 /** 061 * These are strings that can cause problems with our storage layout 062 */ 063 private static final Set<String> FORBIDDEN_ID_PART_STRINGS = Set.of( 064 "fcr-root", 065 ".fcrepo", 066 "fcr-container.nt" 067 ); 068 private static final Set<String> FORBIDDEN_ID_PART_SUFFIXES = Set.of( 069 "~fcr-desc", 070 "~fcr-acl", 071 "~fcr-desc.nt", 072 "~fcr-acl.nt" 073 ); 074 075 /** 076 * The Fedora ID with prefix and extensions. eg info:fedora/object1/another/fcr:versions/20000101121212 077 */ 078 private final String fullId; 079 080 /** 081 * The Fedora ID with prefix but without extensions. eg info:fedora/object1/another 082 */ 083 private final String baseId; 084 085 /** 086 * The Fedora ID without prefix but with extensions. eg /object1/another/fcr:versions/20000101121212 087 */ 088 private final String fullPath; 089 090 /** 091 * The Fedora ID prefix and extensions URL encoded. 092 */ 093 private final String encodedFullId; 094 095 private String hashUri; 096 private boolean isRepositoryRoot = false; 097 private boolean isNonRdfSourceDescription = false; 098 private boolean isAcl = false; 099 private boolean isMemento = false; 100 private boolean isTimemap = false; 101 private boolean isTombstone = false; 102 private Instant mementoDatetime; 103 private String mementoDatetimeStr; 104 105 private final static Set<Pattern> extensions = Set.of(FCR_TOMBSTONE, FCR_METADATA, FCR_ACL, FCR_VERSIONS) 106 .stream().map(Pattern::compile).collect(Collectors.toSet()); 107 108 private final static Escaper fedoraIdEscaper = new PercentEscaper("-._~!$'()*,;&=@:+/?#", false); 109 110 /** 111 * Basic constructor. 112 * @param fullId The full identifier or null if root. 113 * @throws IllegalArgumentException If ID does not start with expected prefix. 114 */ 115 private FedoraId(final String fullId) { 116 this.fullId = ensurePrefix(fullId).replaceAll("/+$", ""); 117 // Carry the path of the request for any exceptions. 118 this.fullPath = this.fullId.substring(FEDORA_ID_PREFIX.length()); 119 checkForInvalidPath(); 120 this.baseId = processIdentifier(); 121 enforceStorageLayoutNamingConstraints(); 122 this.encodedFullId = fedoraIdEscaper.escape(this.fullId); 123 } 124 125 /** 126 * Static create method 127 * @param additions One or more strings to build an ID. 128 * @return The FedoraId. 129 */ 130 @JsonCreator 131 public static FedoraId create(final String... additions) { 132 return new FedoraId(idBuilder(additions)); 133 } 134 135 /** 136 * Get a FedoraId for repository root. 137 * @return The FedoraId for repository root. 138 */ 139 public static FedoraId getRepositoryRootId() { 140 return new FedoraId(null); 141 } 142 143 /** 144 * Is the identifier for the repository root. 145 * @return true of id is equal to info:fedora/ 146 */ 147 public boolean isRepositoryRoot() { 148 return isRepositoryRoot; 149 } 150 151 /** 152 * Is the identifier for a Memento? 153 * @return true if the id is for the fcr:versions endpoint and has a memento datetime string after it. 154 */ 155 public boolean isMemento() { 156 return isMemento; 157 } 158 159 /** 160 * Is the identifier for an ACL? 161 * @return true if the id is for the fcr:acl endpoint. 162 */ 163 public boolean isAcl() { 164 return isAcl; 165 } 166 167 /** 168 * Is the identifier for a timemap? 169 * @return true if id for the fcr:versions endpoint and NOT a memento. 170 */ 171 public boolean isTimemap() { 172 return isTimemap; 173 } 174 175 /** 176 * Is the identifier for a nonRdfSourceDescription? 177 * @return true if id for the fcr:metadata endpoint 178 */ 179 public boolean isDescription() { 180 return isNonRdfSourceDescription; 181 } 182 183 /** 184 * Is the identifier for a tombstone 185 * @return true if id for the fcr:tombstone endpoint 186 */ 187 public boolean isTombstone() { 188 return isTombstone; 189 } 190 191 /** 192 * Is the identifier for a hash uri? 193 * @return true if full id referenced a hash uri. 194 */ 195 public boolean isHashUri() { 196 return hashUri != null; 197 } 198 199 /** 200 * Get the hash uri. 201 * @return the hash uri from the id or null if none. 202 */ 203 public String getHashUri() { 204 return hashUri; 205 } 206 207 /** 208 * Returns the ID string for the physical resource the Fedora ID describes. In most cases, this ID is the same as 209 * the full resource ID. However, if the resource is a memento, timemap, or tombstone, then the ID returned here 210 * will be for the resource that contains it. Here are some examples: 211 * 212 * <ul> 213 * <li>"info:fedora/object1/another/fcr:versions/20000101121212" => "info:fedora/object1/another"</li> 214 * <li>"info:fedora/object1/another/fcr:metadata" => "info:fedora/object1/another/fcr:metadata"</li> 215 * <li>"info:fedora/object1/another" => "info:fedora/object1/another"</li> 216 * </ul> 217 * 218 * @return the ID of the associated physical resource 219 */ 220 public String getResourceId() { 221 if (isNonRdfSourceDescription) { 222 return baseId + "/" + FCR_METADATA; 223 } else if (isAcl) { 224 return baseId + "/" + FCR_ACL; 225 } 226 return baseId; 227 } 228 229 /** 230 * Behaves the same as {@link #getResourceId()} except it returns a FedoraId rather than a String. 231 * 232 * @return the ID of the associated physical resource 233 */ 234 public FedoraId asResourceId() { 235 return FedoraId.create(getResourceId()); 236 } 237 238 /** 239 * Returns the ID string for the base ID the Fedora ID describes. This value is the equivalent of the full ID 240 * with all extensions removed. 241 * 242 * <ul> 243 * <li>"info:fedora/object1/another/fcr:versions/20000101121212" => "info:fedora/object1/another"</li> 244 * <li>"info:fedora/object1/another/fcr:metadata" => "info:fedora/object1/another"</li> 245 * <li>"info:fedora/object1/another" => "info:fedora/object1/another"</li> 246 * </ul> 247 * 248 * @return the ID of the associated base resource 249 */ 250 public String getBaseId() { 251 return baseId; 252 } 253 254 /** 255 * Behaves the same as {@link #getBaseId()} except it returns a FedoraId rather than a String. 256 * 257 * @return the ID of the associated base resource 258 */ 259 public FedoraId asBaseId() { 260 return FedoraId.create(getBaseId()); 261 } 262 263 /** 264 * Return the original full ID. 265 * @return the id. 266 */ 267 public String getFullId() { 268 return fullId; 269 } 270 271 /** 272 * Return the original full ID without the info:fedora prefix. 273 * @return the full id path part 274 */ 275 public String getFullIdPath() { 276 return fullPath; 277 } 278 279 /** 280 * @return The encoded full ID. 281 */ 282 public String getEncodedFullId() { 283 return encodedFullId; 284 } 285 286 /** 287 * Return the Memento datetime as Instant. 288 * @return The datetime or null if not a memento. 289 */ 290 public Instant getMementoInstant() { 291 return mementoDatetime; 292 } 293 294 /** 295 * Return the Memento datetime string. 296 * @return The yyyymmddhhiiss memento datetime or null if not a Memento. 297 */ 298 public String getMementoString() { 299 return mementoDatetimeStr; 300 } 301 302 /** 303 * Creates a new Fedora ID by joining the base ID of this Fedora ID with the specified string part. Any extensions 304 * that this Fedora ID contains are discarded. For example: 305 * <p> 306 * Resolving "child" against "info:fedora/object1/another/fcr:versions/20000101121212" yields 307 * "info:fedora/object1/another/child". 308 * 309 * @param child the part to join 310 * @return new Fedora ID in the form baseId/child 311 */ 312 public FedoraId resolve(final String child) { 313 if (StringUtils.isBlank(child)) { 314 throw new IllegalArgumentException("Child cannot be blank"); 315 } 316 return FedoraId.create(baseId, child); 317 } 318 319 /** 320 * Creates a new Fedora ID based on this ID that points to an ACL resource. The base ID, full ID without extensions, 321 * is always used to construct an ACL ID. If this ID is already an ACL, then it returns itself. 322 * 323 * @return ACL resource ID 324 */ 325 public FedoraId asAcl() { 326 if (isAcl()) { 327 return this; 328 } 329 330 return FedoraId.create(getBaseId(), FCR_ACL); 331 } 332 333 /** 334 * Creates a new Fedora ID based on this ID that points to a binary description resource. There is no guarantee that 335 * the binary description resource exists. If this ID is already a description, then it returns itself. Otherwise, 336 * it uses the base ID, without extensions, to construct the new ID. If this Fedora ID is a timemap or memento or 337 * a hash uri, then these extensions are applied to new description ID as well. 338 * 339 * @return description resource ID 340 */ 341 public FedoraId asDescription() { 342 if (isDescription()) { 343 return this; 344 } 345 346 if (isTimemap()) { 347 return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS); 348 } 349 350 if (isMemento()) { 351 return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS, appendHashIfPresent(getMementoString())); 352 } 353 354 return FedoraId.create(getBaseId(), appendHashIfPresent(FCR_METADATA)); 355 } 356 357 /** 358 * Creates a new Fedora ID based on this ID that points to a tombstone resource. If this ID is already a tombstone, 359 * then it returns itself. Otherwise, it uses the base ID, without extensions, to construct the new ID. 360 * 361 * @return tombstone resource ID 362 */ 363 public FedoraId asTombstone() { 364 if (isTombstone()) { 365 return this; 366 } 367 368 return FedoraId.create(getBaseId(), FCR_TOMBSTONE); 369 } 370 371 /** 372 * Creates a new Fedora ID based on this ID that points to a timemap resource. If this ID is already a timemap, 373 * then it returns itself. Otherwise, it uses the base ID, without extensions, to construct the new ID. Unless 374 * this ID is a binary description, in which case the new ID is constructed using the full ID. 375 * 376 * @return timemap resource ID 377 */ 378 public FedoraId asTimemap() { 379 if (isTimemap()) { 380 return this; 381 } 382 383 if (isDescription()) { 384 return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS); 385 } 386 387 return FedoraId.create(getBaseId(), FCR_VERSIONS); 388 } 389 390 /** 391 * Creates a new Fedora ID based on this ID that points to a memento resource. If this ID is already a memento, 392 * then it returns itself. If this ID is an ACL, tombstone, or timemap, then the new ID is constructed using this 393 * ID's base ID. Otherwise, the full ID is used. 394 * 395 * @param mementoInstant memento representation 396 * @return memento resource ID 397 */ 398 public FedoraId asMemento(final Instant mementoInstant) { 399 return asMemento(MEMENTO_LABEL_FORMATTER.format(mementoInstant)); 400 } 401 402 /** 403 * Creates a new Fedora ID based on this ID that points to a memento resource. If this ID is already a memento, 404 * then it returns itself. If this ID is an ACL, tombstone, or timemap, then the new ID is constructed using this 405 * ID's base ID. If this ID is a description, then the new ID is appended to the description ID. 406 * 407 * @param mementoString string memento representation 408 * @return memento resource ID 409 */ 410 public FedoraId asMemento(final String mementoString) { 411 if (isMemento()) { 412 return this; 413 } 414 415 if (isDescription()) { 416 return FedoraId.create(getBaseId(), FCR_METADATA, FCR_VERSIONS, appendHashIfPresent(mementoString)); 417 } 418 419 if (isAcl() || isTombstone() || isTimemap()) { 420 return FedoraId.create(getBaseId(), FCR_VERSIONS, mementoString); 421 } 422 423 return FedoraId.create(getBaseId(), FCR_VERSIONS, appendHashIfPresent(mementoString)); 424 } 425 426 @Override 427 public boolean equals(final Object obj) { 428 if (obj == this) { 429 return true; 430 } 431 432 if (!(obj instanceof FedoraId)) { 433 return false; 434 } 435 436 final var testObj = (FedoraId) obj; 437 return Objects.equals(testObj.getFullId(), this.getFullId()); 438 } 439 440 @Override 441 public int hashCode() { 442 return getFullId().hashCode(); 443 } 444 445 @JsonValue 446 @Override 447 public String toString() { 448 return getFullId(); 449 } 450 451 /** 452 * Concatenates all the parts with slashes 453 * @param parts array of strings 454 * @return the concatenated string. 455 */ 456 private static String idBuilder(final String... parts) { 457 if (parts != null && parts.length > 0) { 458 return Arrays.stream(parts).filter(Objects::nonNull) 459 .map(s -> s.startsWith("/") ? s.substring(1) : s) 460 .map(s -> s.endsWith("/") ? s.substring(0, s.length() - 1 ) : s) 461 .collect(Collectors.joining("/")); 462 } 463 return ""; 464 } 465 466 /** 467 * Ensure the ID has the info:fedora/ prefix. 468 * @param id the identifier, if null assume repository root (info:fedora/) 469 * @return the identifier with the info:fedora/ prefix. 470 */ 471 private static String ensurePrefix(final String id) { 472 if (id == null) { 473 return FEDORA_ID_PREFIX; 474 } 475 return id.startsWith(FEDORA_ID_PREFIX) ? id : FEDORA_ID_PREFIX + "/" + id; 476 } 477 478 /** 479 * Process the original ID into its parts without using a regular expression. 480 */ 481 private String processIdentifier() { 482 // Regex pattern which decomposes a http resource uri into components 483 // The first group determines if it is an fcr:metadata non-rdf source. 484 // The second group determines if the path is for a memento or timemap. 485 // The third group allows for a memento identifier. 486 // The fourth group for allows ACL. 487 // The fifth group allows for any hashed suffixes. 488 // ".*?(/" + FCR_METADATA + ")?(/" + FCR_VERSIONS + "(/\\d{14})?)?(/" + FCR_ACL + ")?(\\#\\S+)?$"); 489 if (this.fullId.contains("//")) { 490 throw new InvalidResourceIdentifierException(String.format("Path contains empty element! %s", fullPath)); 491 } 492 String processID = this.fullId; 493 if (processID.equals(FEDORA_ID_PREFIX)) { 494 this.isRepositoryRoot = true; 495 return this.fullId; 496 } 497 if (processID.contains("#")) { 498 final String[] hashSplits = StringUtils.splitPreserveAllTokens(processID, "#"); 499 if (hashSplits.length > 2) { 500 throw new InvalidResourceIdentifierException(String.format( 501 "Path <%s> is invalid. It may not contain more than one #", 502 fullPath)); 503 } 504 this.hashUri = hashSplits[1]; 505 processID = hashSplits[0]; 506 } 507 if (processID.contains(FCR_TOMBSTONE)) { 508 processID = removePart(processID, FCR_TOMBSTONE); 509 this.isTombstone = true; 510 } 511 if (processID.contains(FCR_ACL)) { 512 processID = removePart(processID, FCR_ACL); 513 this.isAcl = true; 514 } 515 if (processID.contains(FCR_VERSIONS)) { 516 final String[] versionSplits = split(processID, FCR_VERSIONS); 517 if (versionSplits.length > 2) { 518 throw new InvalidResourceIdentifierException(String.format( 519 "Path <%s> is invalid. May not contain multiple %s parts.", 520 fullPath, FCR_VERSIONS)); 521 } else if (versionSplits.length == 2 && versionSplits[1].isEmpty()) { 522 this.isTimemap = true; 523 } else { 524 final String afterVersion = versionSplits[1]; 525 if (afterVersion.matches("/\\d{14}")) { 526 this.isMemento = true; 527 this.mementoDatetimeStr = afterVersion.substring(1); 528 try { 529 this.mementoDatetime = Instant.from(MEMENTO_LABEL_FORMATTER.parse(this.mementoDatetimeStr)); 530 } catch (final DateTimeParseException e) { 531 throw new InvalidMementoPathException(String.format("Invalid request for memento at %s", 532 fullPath)); 533 } 534 } else if (afterVersion.equals("/")) { 535 // Possible trailing slash? 536 this.isTimemap = true; 537 } else { 538 throw new InvalidMementoPathException(String.format("Invalid request for memento at %s", fullPath)); 539 } 540 } 541 processID = versionSplits[0]; 542 } 543 if (processID.contains(FCR_METADATA)) { 544 processID = removePart(processID, FCR_METADATA); 545 this.isNonRdfSourceDescription = true; 546 } 547 if (processID.endsWith("/")) { 548 processID = processID.replaceAll("/+$", ""); 549 } 550 551 return processID; 552 } 553 554 private String removePart(final String original, final String part) { 555 final String[] split = split(original, part); 556 if (split.length > 2 || (split.length == 2 && !split[1].isEmpty())) { 557 throw new InvalidResourceIdentifierException("Path is invalid:" + fullPath); 558 } 559 return split[0]; 560 } 561 562 private String[] split(final String original, final String part) { 563 return StringUtils.splitByWholeSeparatorPreserveAllTokens(original, "/" + part); 564 } 565 566 /** 567 * Check for obvious path errors. 568 */ 569 private void checkForInvalidPath() { 570 // Check for combinations of endpoints not allowed. 571 if ( 572 // ID contains fcr:acl or fcr:tombstone AND fcr:metadata or fcr:versions 573 ((this.fullId.contains(FCR_ACL) || this.fullId.contains(FCR_TOMBSTONE)) && 574 (this.fullId.contains(FCR_METADATA) || this.fullId.contains(FCR_VERSIONS))) || 575 // or ID contains fcr:acl AND fcr:tombstone 576 (this.fullId.contains(FCR_TOMBSTONE) && this.fullId.contains(FCR_ACL)) 577 ) { 578 throw new InvalidResourceIdentifierException(String.format("Path is invalid: %s", fullPath)); 579 } 580 // Ensure we don't have 2 of any of the extensions, ie. info:fedora/object/fcr:acl/fcr:acl, etc. 581 for (final Pattern extension : extensions) { 582 if (extension.matcher(this.fullId).results().count() > 1) { 583 throw new InvalidResourceIdentifierException(String.format("Path is invalid: %s", fullPath)); 584 } 585 } 586 } 587 588 /** 589 * Ensures that the Fedora ID does not violate any naming restrictions that are in place prevent collisions on disk. 590 * These restrictions are based on the following naming conventions: 591 * https://wiki.lyrasis.org/display/FF/Design+-+Fedora+OCFL+Object+Structure 592 * 593 * All ids should be validated on resource creation 594 */ 595 private void enforceStorageLayoutNamingConstraints() { 596 final var finalPart = StringUtils.substringAfterLast(baseId, "/"); 597 598 if (FORBIDDEN_ID_PART_STRINGS.contains(finalPart)) { 599 throw new InvalidResourceIdentifierException( 600 String.format("Invalid resource ID. IDs may not contain the string '%s'.", finalPart)); 601 } 602 603 FORBIDDEN_ID_PART_SUFFIXES.forEach(suffix -> { 604 if (finalPart.endsWith(suffix) && !finalPart.equals(suffix)) { 605 throw new InvalidResourceIdentifierException( 606 String.format("Invalid resource ID. IDs may not end with '%s'.", suffix)); 607 } 608 }); 609 } 610 611 private String appendHashIfPresent(final String original) { 612 if (isHashUri()) { 613 return original + "#" + getHashUri(); 614 } 615 return original; 616 } 617 618}