Training courses

Kernel and Embedded Linux

Bootlin training courses

Embedded Linux, kernel,
Yocto Project, Buildroot, real-time,
graphics, boot time, debugging...

Bootlin logo

Elixir Cross Referencer

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
/*-
 * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
 *
 * This code is derived from software contributed to The DragonFly Project
 * by Matthew Dillon <dillon@backplane.com>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 * 3. Neither the name of The DragonFly Project nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific, prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 * $DragonFly: src/sys/vfs/hammer/hammer_disk.h,v 1.55 2008/11/13 02:18:43 dillon Exp $
 * $FreeBSD$
 */

#ifndef VFS_HAMMER_DISK_H_
#define VFS_HAMMER_DISK_H_

#include <sys/endian.h>

#ifndef _SYS_UUID_H_
#include <sys/uuid.h>
#endif

/*
 * The structures below represent the on-disk format for a HAMMER
 * filesystem.  Note that all fields for on-disk structures are naturally
 * aligned.  HAMMER uses little endian for fields in on-disk structures.
 * HAMMER doesn't support big endian arch, but is planned.
 *
 * Most of HAMMER revolves around the concept of an object identifier.  An
 * obj_id is a 64 bit quantity which uniquely identifies a filesystem object
 * FOR THE ENTIRE LIFE OF THE FILESYSTEM.  This uniqueness allows backups
 * and mirrors to retain varying amounts of filesystem history by removing
 * any possibility of conflict through identifier reuse.
 *
 * A HAMMER filesystem may span multiple volumes.
 *
 * A HAMMER filesystem uses a 16K filesystem buffer size.  All filesystem
 * I/O is done in multiples of 16K.
 *
 * 64K X-bufs are used for blocks >= a file's 1MB mark.
 *
 * Per-volume storage limit: 52 bits		4096 TB
 * Per-Zone storage limit: 60 bits		1 MTB
 * Per-filesystem storage limit: 60 bits	1 MTB
 */
#define HAMMER_BUFSIZE		16384
#define HAMMER_XBUFSIZE		65536
#define HAMMER_HBUFSIZE		(HAMMER_BUFSIZE / 2)
#define HAMMER_XDEMARC		(1024 * 1024)
#define HAMMER_BUFMASK		(HAMMER_BUFSIZE - 1)
#define HAMMER_XBUFMASK		(HAMMER_XBUFSIZE - 1)

#define HAMMER_BUFSIZE64	((uint64_t)HAMMER_BUFSIZE)
#define HAMMER_BUFMASK64	((uint64_t)HAMMER_BUFMASK)

#define HAMMER_XBUFSIZE64	((uint64_t)HAMMER_XBUFSIZE)
#define HAMMER_XBUFMASK64	((uint64_t)HAMMER_XBUFMASK)

#define HAMMER_OFF_ZONE_MASK	0xF000000000000000ULL /* zone portion */
#define HAMMER_OFF_VOL_MASK	0x0FF0000000000000ULL /* volume portion */
#define HAMMER_OFF_SHORT_MASK	0x000FFFFFFFFFFFFFULL /* offset portion */
#define HAMMER_OFF_LONG_MASK	0x0FFFFFFFFFFFFFFFULL /* offset portion */

#define HAMMER_OFF_BAD		((hammer_off_t)-1)

#define HAMMER_BUFSIZE_DOALIGN(offset)				\
	(((offset) + HAMMER_BUFMASK) & ~HAMMER_BUFMASK)
#define HAMMER_BUFSIZE64_DOALIGN(offset)			\
	(((offset) + HAMMER_BUFMASK64) & ~HAMMER_BUFMASK64)

#define HAMMER_XBUFSIZE_DOALIGN(offset)				\
	(((offset) + HAMMER_XBUFMASK) & ~HAMMER_XBUFMASK)
#define HAMMER_XBUFSIZE64_DOALIGN(offset)			\
	(((offset) + HAMMER_XBUFMASK64) & ~HAMMER_XBUFMASK64)

/*
 * The current limit of volumes that can make up a HAMMER FS
 */
#define HAMMER_MAX_VOLUMES	256

/*
 * Reserved space for (future) header junk after the volume header.
 */
#define HAMMER_MIN_VOL_JUNK	(HAMMER_BUFSIZE * 16)	/* 256 KB */
#define HAMMER_MAX_VOL_JUNK	HAMMER_MIN_VOL_JUNK
#define HAMMER_VOL_JUNK_SIZE	HAMMER_MIN_VOL_JUNK

/*
 * Hammer transaction ids are 64 bit unsigned integers and are usually
 * synchronized with the time of day in nanoseconds.
 *
 * Hammer offsets are used for FIFO indexing and embed a cycle counter
 * and volume number in addition to the offset.  Most offsets are required
 * to be 16 KB aligned.
 */
typedef uint64_t hammer_tid_t;
typedef uint64_t hammer_off_t;
typedef uint32_t hammer_crc_t;
typedef uuid_t hammer_uuid_t;

#define HAMMER_MIN_TID		0ULL			/* unsigned */
#define HAMMER_MAX_TID		0xFFFFFFFFFFFFFFFFULL	/* unsigned */
#define HAMMER_MIN_KEY		-0x8000000000000000LL	/* signed */
#define HAMMER_MAX_KEY		0x7FFFFFFFFFFFFFFFLL	/* signed */
#define HAMMER_MIN_OBJID	HAMMER_MIN_KEY		/* signed */
#define HAMMER_MAX_OBJID	HAMMER_MAX_KEY		/* signed */
#define HAMMER_MIN_RECTYPE	0x0U			/* unsigned */
#define HAMMER_MAX_RECTYPE	0xFFFFU			/* unsigned */
#define HAMMER_MIN_OFFSET	0ULL			/* unsigned */
#define HAMMER_MAX_OFFSET	0xFFFFFFFFFFFFFFFFULL	/* unsigned */

/*
 * hammer_off_t has several different encodings.  Note that not all zones
 * encode a vol_no.  Zone bits are not a part of filesystem capacity as
 * the zone bits aren't directly or indirectly mapped to physical volumes.
 *
 * In other words, HAMMER's logical filesystem offset consists of 64 bits,
 * but the filesystem is considered 60 bits filesystem, not 64 bits.
 * The maximum filesystem capacity is 1EB, not 16EB.
 *
 * zone 0:		available, a big-block that contains the offset is unused
 * zone 1 (z,v,o):	raw volume relative (offset 0 is the volume header)
 * zone 2 (z,v,o):	raw buffer relative (offset 0 is the first buffer)
 * zone 3 (z,o):	undo/redo fifo	- fixed zone-2 offset array in volume header
 * zone 4 (z,v,o):	freemap		- only real blockmap
 * zone 8 (z,v,o):	B-Tree		- actually zone-2 address
 * zone 9 (z,v,o):	meta		- actually zone-2 address
 * zone 10 (z,v,o):	large-data	- actually zone-2 address
 * zone 11 (z,v,o):	small-data	- actually zone-2 address
 * zone 15:		unavailable, usually the offset is beyond volume size
 *
 * layer1/layer2 direct map:
 *	     Maximum HAMMER filesystem capacity from volume aspect
 *	     2^8(max volumes) * 2^52(max volume size) = 2^60 = 1EB (long offset)
 *	    <------------------------------------------------------------->
 *	     8bits   52bits (short offset)
 *	    <------><----------------------------------------------------->
 *	zzzzvvvvvvvvoooo oooooooooooooooo oooooooooooooooo oooooooooooooooo
 *	----111111111111 1111112222222222 222222222ooooooo oooooooooooooooo
 *	    <-----------------><------------------><---------------------->
 *	     18bits             19bits              23bits
 *	    <------------------------------------------------------------->
 *	     2^18(layer1) * 2^19(layer2) * 2^23(big-block) = 2^60 = 1EB
 *	     Maximum HAMMER filesystem capacity from blockmap aspect
 *
 * volume#0 layout
 *	+-------------------------> offset 0 of a device/partition
 *	| volume header (1928 bytes)
 *	| the rest of header junk space (HAMMER_BUFSIZE aligned)
 *	+-------------------------> vol_bot_beg
 *	| boot area (HAMMER_BUFSIZE aligned)
 *	+-------------------------> vol_mem_beg
 *	| memory log (HAMMER_BUFSIZE aligned)
 *	+-------------------------> vol_buf_beg (physical offset of zone-2)
 *	| zone-4 big-block for layer1
 *	+-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE
 *	| zone-4 big-blocks for layer2
 *	| ... (1 big-block per 4TB space)
 *	+-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
 *	| zone-3 big-blocks for UNDO/REDO FIFO
 *	| ... (max 128 big-blocks)
 *	+-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
 *	| zone-8 big-block for root B-Tree node/etc
 *	+-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
 *	| zone-9 big-block for root inode/PFS/etc
 *	+-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
 *	| zone-X big-blocks
 *	| ... (big-blocks for new zones after newfs_hammer)
 *	| ...
 *	| ...
 *	| ...
 *	| ...
 *	+-------------------------> vol_buf_end (HAMMER_BUFSIZE aligned)
 *	+-------------------------> end of a device/partition
 *
 * volume#N layout (0<N<256)
 *	+-------------------------> offset 0 of a device/partition
 *	| volume header (1928 bytes)
 *	| the rest of header junk space (HAMMER_BUFSIZE aligned)
 *	+-------------------------> vol_bot_beg
 *	| boot area (HAMMER_BUFSIZE aligned)
 *	+-------------------------> vol_mem_beg
 *	| memory log (HAMMER_BUFSIZE aligned)
 *	+-------------------------> vol_buf_beg (physical offset of zone-2)
 *	| zone-4 big-blocks for layer2
 *	| ... (1 big-block per 4TB space)
 *	+-------------------------> vol_buf_beg + HAMMER_BIGBLOCK_SIZE * ...
 *	| zone-X big-blocks
 *	| ... (unused until volume#(N-1) runs out of space)
 *	| ...
 *	| ...
 *	| ...
 *	| ...
 *	+-------------------------> vol_buf_end (HAMMER_BUFSIZE aligned)
 *	+-------------------------> end of a device/partition
 */

#define HAMMER_ZONE_RAW_VOLUME		0x1000000000000000ULL
#define HAMMER_ZONE_RAW_BUFFER		0x2000000000000000ULL
#define HAMMER_ZONE_UNDO		0x3000000000000000ULL
#define HAMMER_ZONE_FREEMAP		0x4000000000000000ULL
#define HAMMER_ZONE_RESERVED05		0x5000000000000000ULL  /* not used */
#define HAMMER_ZONE_RESERVED06		0x6000000000000000ULL  /* not used */
#define HAMMER_ZONE_RESERVED07		0x7000000000000000ULL  /* not used */
#define HAMMER_ZONE_BTREE		0x8000000000000000ULL
#define HAMMER_ZONE_META		0x9000000000000000ULL
#define HAMMER_ZONE_LARGE_DATA		0xA000000000000000ULL
#define HAMMER_ZONE_SMALL_DATA		0xB000000000000000ULL
#define HAMMER_ZONE_RESERVED0C		0xC000000000000000ULL  /* not used */
#define HAMMER_ZONE_RESERVED0D		0xD000000000000000ULL  /* not used */
#define HAMMER_ZONE_RESERVED0E		0xE000000000000000ULL  /* not used */
#define HAMMER_ZONE_UNAVAIL		0xF000000000000000ULL

#define HAMMER_ZONE_RAW_VOLUME_INDEX	1
#define HAMMER_ZONE_RAW_BUFFER_INDEX	2
#define HAMMER_ZONE_UNDO_INDEX		3
#define HAMMER_ZONE_FREEMAP_INDEX	4
#define HAMMER_ZONE_BTREE_INDEX		8
#define HAMMER_ZONE_META_INDEX		9
#define HAMMER_ZONE_LARGE_DATA_INDEX	10
#define HAMMER_ZONE_SMALL_DATA_INDEX	11
#define HAMMER_ZONE_UNAVAIL_INDEX	15

#define HAMMER_MAX_ZONES		16

#define HAMMER_ZONE(offset)		((offset) & HAMMER_OFF_ZONE_MASK)

#define hammer_is_zone_raw_volume(offset)		\
	(HAMMER_ZONE(offset) == HAMMER_ZONE_RAW_VOLUME)
#define hammer_is_zone_raw_buffer(offset)		\
	(HAMMER_ZONE(offset) == HAMMER_ZONE_RAW_BUFFER)
#define hammer_is_zone_undo(offset)			\
	(HAMMER_ZONE(offset) == HAMMER_ZONE_UNDO)
#define hammer_is_zone_freemap(offset)			\
	(HAMMER_ZONE(offset) == HAMMER_ZONE_FREEMAP)
#define hammer_is_zone_btree(offset)			\
	(HAMMER_ZONE(offset) == HAMMER_ZONE_BTREE)
#define hammer_is_zone_meta(offset)			\
	(HAMMER_ZONE(offset) == HAMMER_ZONE_META)
#define hammer_is_zone_large_data(offset)		\
	(HAMMER_ZONE(offset) == HAMMER_ZONE_LARGE_DATA)
#define hammer_is_zone_small_data(offset)		\
	(HAMMER_ZONE(offset) == HAMMER_ZONE_SMALL_DATA)
#define hammer_is_zone_unavail(offset)			\
	(HAMMER_ZONE(offset) == HAMMER_ZONE_UNAVAIL)
#define hammer_is_zone_data(offset)			\
	(hammer_is_zone_large_data(offset) || hammer_is_zone_small_data(offset))

#define hammer_is_index_record(zone)			\
	((zone) >= HAMMER_ZONE_BTREE_INDEX &&		\
	 (zone) < HAMMER_MAX_ZONES)

#define hammer_is_zone_record(offset)			\
	hammer_is_index_record(HAMMER_ZONE_DECODE(offset))

#define hammer_is_index_direct_xlated(zone)		\
	(((zone) == HAMMER_ZONE_RAW_BUFFER_INDEX) ||	\
	 ((zone) == HAMMER_ZONE_FREEMAP_INDEX) ||	\
	 hammer_is_index_record(zone))

#define hammer_is_zone_direct_xlated(offset)		\
	hammer_is_index_direct_xlated(HAMMER_ZONE_DECODE(offset))

#define HAMMER_ZONE_ENCODE(zone, ham_off)		\
	(((hammer_off_t)(zone) << 60) | (ham_off))
#define HAMMER_ZONE_DECODE(ham_off)			\
	((int)(((hammer_off_t)(ham_off) >> 60)))

#define HAMMER_VOL_ENCODE(vol_no)			\
	((hammer_off_t)((vol_no) & 255) << 52)
#define HAMMER_VOL_DECODE(ham_off)			\
	((int)(((hammer_off_t)(ham_off) >> 52) & 255))

#define HAMMER_OFF_SHORT_ENCODE(offset)			\
	((hammer_off_t)(offset) & HAMMER_OFF_SHORT_MASK)
#define HAMMER_OFF_LONG_ENCODE(offset)			\
	((hammer_off_t)(offset) & HAMMER_OFF_LONG_MASK)

#define HAMMER_ENCODE(zone, vol_no, offset)		\
	(((hammer_off_t)(zone) << 60) |			\
	HAMMER_VOL_ENCODE(vol_no) |			\
	HAMMER_OFF_SHORT_ENCODE(offset))
#define HAMMER_ENCODE_RAW_VOLUME(vol_no, offset)	\
	HAMMER_ENCODE(HAMMER_ZONE_RAW_VOLUME_INDEX, vol_no, offset)
#define HAMMER_ENCODE_RAW_BUFFER(vol_no, offset)	\
	HAMMER_ENCODE(HAMMER_ZONE_RAW_BUFFER_INDEX, vol_no, offset)
#define HAMMER_ENCODE_UNDO(offset)			\
	HAMMER_ENCODE(HAMMER_ZONE_UNDO_INDEX, HAMMER_ROOT_VOLNO, offset)
#define HAMMER_ENCODE_FREEMAP(vol_no, offset)		\
	HAMMER_ENCODE(HAMMER_ZONE_FREEMAP_INDEX, vol_no, offset)

/*
 * Translate a zone address to zone-X address.
 */
#define hammer_xlate_to_zoneX(zone, offset)		\
	HAMMER_ZONE_ENCODE((zone), (offset) & ~HAMMER_OFF_ZONE_MASK)
#define hammer_xlate_to_zone2(offset)			\
	hammer_xlate_to_zoneX(HAMMER_ZONE_RAW_BUFFER_INDEX, (offset))

#define hammer_data_zone(data_len)			\
	(((data_len) >= HAMMER_BUFSIZE) ?		\
	 HAMMER_ZONE_LARGE_DATA :			\
	 HAMMER_ZONE_SMALL_DATA)
#define hammer_data_zone_index(data_len)		\
	(((data_len) >= HAMMER_BUFSIZE) ?		\
	 HAMMER_ZONE_LARGE_DATA_INDEX :			\
	 HAMMER_ZONE_SMALL_DATA_INDEX)

/*
 * Big-Block backing store
 *
 * A blockmap is a two-level map which translates a blockmap-backed zone
 * offset into a raw zone 2 offset.  The layer 1 handles 18 bits and the
 * layer 2 handles 19 bits.  The 8M big-block size is 23 bits so two
 * layers gives us 18+19+23 = 60 bits of address space.
 *
 * When using hinting for a blockmap lookup, the hint is lost when the
 * scan leaves the HINTBLOCK, which is typically several BIGBLOCK's.
 * HINTBLOCK is a heuristic.
 */
#define HAMMER_HINTBLOCK_SIZE		(HAMMER_BIGBLOCK_SIZE * 4)
#define HAMMER_HINTBLOCK_MASK64		((uint64_t)HAMMER_HINTBLOCK_SIZE - 1)
#define HAMMER_BIGBLOCK_SIZE		(8192 * 1024)
#define HAMMER_BIGBLOCK_SIZE64		((uint64_t)HAMMER_BIGBLOCK_SIZE)
#define HAMMER_BIGBLOCK_MASK		(HAMMER_BIGBLOCK_SIZE - 1)
#define HAMMER_BIGBLOCK_MASK64		((uint64_t)HAMMER_BIGBLOCK_SIZE - 1)
#define HAMMER_BIGBLOCK_BITS		23
#if 0
#define HAMMER_BIGBLOCK_OVERFILL	(6144 * 1024)
#endif
#if (1 << HAMMER_BIGBLOCK_BITS) != HAMMER_BIGBLOCK_SIZE
#error "HAMMER_BIGBLOCK_BITS BROKEN"
#endif

#define HAMMER_BUFFERS_PER_BIGBLOCK			\
	(HAMMER_BIGBLOCK_SIZE / HAMMER_BUFSIZE)
#define HAMMER_BUFFERS_PER_BIGBLOCK_MASK		\
	(HAMMER_BUFFERS_PER_BIGBLOCK - 1)
#define HAMMER_BUFFERS_PER_BIGBLOCK_MASK64		\
	((hammer_off_t)HAMMER_BUFFERS_PER_BIGBLOCK_MASK)

#define HAMMER_BIGBLOCK_DOALIGN(offset)				\
	(((offset) + HAMMER_BIGBLOCK_MASK64) & ~HAMMER_BIGBLOCK_MASK64)

/*
 * Maximum number of mirrors operating in master mode (multi-master
 * clustering and mirroring). Note that HAMMER1 does not support
 * multi-master clustering as of 2015.
 */
#define HAMMER_MAX_MASTERS		16

/*
 * The blockmap is somewhat of a degenerate structure.  HAMMER only actually
 * uses it in its original incarnation to implement the freemap.
 *
 * zone:1	raw volume (no blockmap)
 * zone:2	raw buffer (no blockmap)
 * zone:3	undomap    (direct layer2 array in volume header)
 * zone:4	freemap    (the only real blockmap)
 * zone:8-15	zone id used to classify big-block only, address is actually
 *		a zone-2 address.
 */
typedef struct hammer_blockmap {
	hammer_off_t	phys_offset;  /* zone-2 offset only used by zone-4 */
	hammer_off_t	first_offset; /* zone-X offset only used by zone-3 */
	hammer_off_t	next_offset;  /* zone-X offset for allocation */
	hammer_off_t	alloc_offset; /* zone-X offset only used by zone-3 */
	uint32_t	reserved01;
	hammer_crc_t	entry_crc;
} *hammer_blockmap_t;

#define HAMMER_BLOCKMAP_CRCSIZE	\
	offsetof(struct hammer_blockmap, entry_crc)

/*
 * The blockmap is a 2-layer entity made up of big-blocks.  The first layer
 * contains 262144 32-byte entries (18 bits), the second layer contains
 * 524288 16-byte entries (19 bits), representing 8MB (23 bit) blockmaps.
 * 18+19+23 = 60 bits.  The top four bits are the zone id.
 *
 * Currently only the freemap utilizes both layers in all their glory.
 * All primary data/meta-data zones actually encode a zone-2 address
 * requiring no real blockmap translation.
 *
 * The freemap uses the upper 8 bits of layer-1 to identify the volume,
 * thus any space allocated via the freemap can be directly translated
 * to a zone:2 (or zone:8-15) address.
 *
 * zone-X blockmap offset: [zone:4][layer1:18][layer2:19][big-block:23]
 */

/*
 * 32 bytes layer1 entry for 8MB big-block.
 * A big-block can hold 2^23 / 2^5 = 2^18 layer1 entries,
 * which equals bits assigned for layer1 in zone-2 address.
 */
typedef struct hammer_blockmap_layer1 {
	hammer_off_t	blocks_free;	/* big-blocks free */
	hammer_off_t	phys_offset;	/* UNAVAIL or zone-2 */
	hammer_off_t	reserved01;
	hammer_crc_t	layer2_crc;	/* xor'd crc's of HAMMER_BLOCKSIZE */
					/* (not yet used) */
	hammer_crc_t	layer1_crc;	/* MUST BE LAST FIELD OF STRUCTURE*/
} *hammer_blockmap_layer1_t;

#define HAMMER_LAYER1_CRCSIZE	\
	offsetof(struct hammer_blockmap_layer1, layer1_crc)

/*
 * 16 bytes layer2 entry for 8MB big-blocks.
 * A big-block can hold 2^23 / 2^4 = 2^19 layer2 entries,
 * which equals bits assigned for layer2 in zone-2 address.
 *
 * NOTE: bytes_free is signed and can legally go negative if/when data
 *	 de-dup occurs.  This field will never go higher than
 *	 HAMMER_BIGBLOCK_SIZE.  If exactly HAMMER_BIGBLOCK_SIZE
 *	 the big-block is completely free.
 */
typedef struct hammer_blockmap_layer2 {
	uint8_t		zone;		/* typed allocation zone */
	uint8_t		reserved01;
	uint16_t	reserved02;
	uint32_t	append_off;	/* allocatable space index */
	int32_t		bytes_free;	/* bytes free within this big-block */
	hammer_crc_t	entry_crc;
} *hammer_blockmap_layer2_t;

#define HAMMER_LAYER2_CRCSIZE	\
	offsetof(struct hammer_blockmap_layer2, entry_crc)

#define HAMMER_BLOCKMAP_UNAVAIL	((hammer_off_t)-1LL)

#define HAMMER_BLOCKMAP_RADIX1	/* 2^18 = 262144 */	\
	((int)(HAMMER_BIGBLOCK_SIZE / sizeof(struct hammer_blockmap_layer1)))
#define HAMMER_BLOCKMAP_RADIX2	/* 2^19 = 524288 */	\
	((int)(HAMMER_BIGBLOCK_SIZE / sizeof(struct hammer_blockmap_layer2)))

#define HAMMER_BLOCKMAP_LAYER1	/* 2^(18+19+23) = 1EB */	\
	(HAMMER_BLOCKMAP_RADIX1 * HAMMER_BLOCKMAP_LAYER2)
#define HAMMER_BLOCKMAP_LAYER2	/* 2^(19+23) = 4TB */		\
	(HAMMER_BLOCKMAP_RADIX2 * HAMMER_BIGBLOCK_SIZE64)

#define HAMMER_BLOCKMAP_LAYER1_MASK	(HAMMER_BLOCKMAP_LAYER1 - 1)
#define HAMMER_BLOCKMAP_LAYER2_MASK	(HAMMER_BLOCKMAP_LAYER2 - 1)

#define HAMMER_BLOCKMAP_LAYER2_DOALIGN(offset)			\
	(((offset) + HAMMER_BLOCKMAP_LAYER2_MASK) &		\
	 ~HAMMER_BLOCKMAP_LAYER2_MASK)

/*
 * Index within layer1 or layer2 big-block for the entry representing
 * a zone-2 physical offset.
 */
#define HAMMER_BLOCKMAP_LAYER1_INDEX(zone2_offset)		\
	((int)(((zone2_offset) & HAMMER_BLOCKMAP_LAYER1_MASK) /	\
	 HAMMER_BLOCKMAP_LAYER2))

#define HAMMER_BLOCKMAP_LAYER2_INDEX(zone2_offset)		\
	((int)(((zone2_offset) & HAMMER_BLOCKMAP_LAYER2_MASK) /	\
	HAMMER_BIGBLOCK_SIZE64))

/*
 * Byte offset within layer1 or layer2 big-block for the entry representing
 * a zone-2 physical offset.  Multiply the index by sizeof(blockmap_layer).
 */
#define HAMMER_BLOCKMAP_LAYER1_OFFSET(zone2_offset)		\
	(HAMMER_BLOCKMAP_LAYER1_INDEX(zone2_offset) *		\
	 sizeof(struct hammer_blockmap_layer1))

#define HAMMER_BLOCKMAP_LAYER2_OFFSET(zone2_offset)		\
	(HAMMER_BLOCKMAP_LAYER2_INDEX(zone2_offset) *		\
	 sizeof(struct hammer_blockmap_layer2))

/*
 * Move on to offset 0 of the next layer1 or layer2.
 */
#define HAMMER_ZONE_LAYER1_NEXT_OFFSET(offset)			\
	(((offset) + HAMMER_BLOCKMAP_LAYER2) & ~HAMMER_BLOCKMAP_LAYER2_MASK)

#define HAMMER_ZONE_LAYER2_NEXT_OFFSET(offset)			\
	(((offset) + HAMMER_BIGBLOCK_SIZE) & ~HAMMER_BIGBLOCK_MASK64)

/*
 * HAMMER UNDO parameters.  The UNDO fifo is mapped directly in the volume
 * header with an array of zone-2 offsets.  A maximum of (128x8MB) = 1GB,
 * and minimum of (64x8MB) = 512MB may be reserved.  The size of the undo
 * fifo is usually set a newfs time.
 */
#define HAMMER_MIN_UNDO_BIGBLOCKS		64
#define HAMMER_MAX_UNDO_BIGBLOCKS		128

/*
 * All on-disk HAMMER structures which make up elements of the UNDO FIFO
 * contain a hammer_fifo_head and hammer_fifo_tail structure.  This structure
 * contains all the information required to validate the fifo element
 * and to scan the fifo in either direction.  The head is typically embedded
 * in higher level hammer on-disk structures while the tail is typically
 * out-of-band.  hdr_size is the size of the whole mess, including the tail.
 *
 * All undo structures are guaranteed to not cross a 16K filesystem
 * buffer boundary.  Most undo structures are fairly small.  Data spaces
 * are not immediately reused by HAMMER so file data is not usually recorded
 * as part of an UNDO.
 *
 * PAD elements are allowed to take up only 8 bytes of space as a special
 * case, containing only hdr_signature, hdr_type, and hdr_size fields,
 * and with the tail overloaded onto the head structure for 8 bytes total.
 *
 * Every undo record has a sequence number.  This number is unrelated to
 * transaction ids and instead collects the undo transactions associated
 * with a single atomic operation.  A larger transactional operation, such
 * as a remove(), may consist of several smaller atomic operations
 * representing raw meta-data operations.
 *
 *				HAMMER VERSION 4 CHANGES
 *
 * In HAMMER version 4 the undo structure alignment is reduced from 16384
 * to 512 bytes in order to ensure that each 512 byte sector begins with
 * a header.  The hdr_seq field in the header is a 32 bit sequence number
 * which allows the recovery code to detect missing sectors
 * without relying on the 32-bit crc and to definitively identify the current
 * undo sequence space without having to rely on information from the volume
 * header.  In addition, new REDO entries in the undo space are used to
 * record write, write/extend, and transaction id updates.
 *
 * The grand result is:
 *
 * (1) The volume header no longer needs to be synchronized for most
 *     flush and fsync operations.
 *
 * (2) Most fsync operations need only lay down REDO records
 *
 * (3) Data overwrite for nohistory operations covered by REDO records
 *     can be supported (instead of rolling a new block allocation),
 *     by rolling UNDO for the prior contents of the data.
 *
 *				HAMMER VERSION 5 CHANGES
 *
 * Hammer version 5 contains a minor adjustment making layer2's bytes_free
 * field signed, allowing dedup to push it into the negative domain.
 */
#define HAMMER_HEAD_ALIGN		8
#define HAMMER_HEAD_ALIGN_MASK		(HAMMER_HEAD_ALIGN - 1)
#define HAMMER_HEAD_DOALIGN(bytes)	\
	(((bytes) + HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK)

#define HAMMER_UNDO_ALIGN		512
#define HAMMER_UNDO_ALIGN64		((uint64_t)512)
#define HAMMER_UNDO_MASK		(HAMMER_UNDO_ALIGN - 1)
#define HAMMER_UNDO_MASK64		(HAMMER_UNDO_ALIGN64 - 1)
#define HAMMER_UNDO_DOALIGN(offset)	\
	(((offset) + HAMMER_UNDO_MASK) & ~HAMMER_UNDO_MASK64)

typedef struct hammer_fifo_head {
	uint16_t hdr_signature;
	uint16_t hdr_type;
	uint32_t hdr_size;	/* Aligned size of the whole mess */
	uint32_t hdr_seq;	/* Sequence number */
	hammer_crc_t hdr_crc;	/* XOR crc up to field w/ crc after field */
} *hammer_fifo_head_t;

#define HAMMER_FIFO_HEAD_CRCOFF	offsetof(struct hammer_fifo_head, hdr_crc)

typedef struct hammer_fifo_tail {
	uint16_t tail_signature;
	uint16_t tail_type;
	uint32_t tail_size;	/* aligned size of the whole mess */
} *hammer_fifo_tail_t;

/*
 * Fifo header types.
 *
 * NOTE: 0x8000U part of HAMMER_HEAD_TYPE_PAD can be removed if the HAMMER
 * version ever gets bumped again. It exists only to keep compatibility with
 * older versions.
 */
#define HAMMER_HEAD_TYPE_PAD	(0x0040U | 0x8000U)
#define HAMMER_HEAD_TYPE_DUMMY	0x0041U		/* dummy entry w/seqno */
#define HAMMER_HEAD_TYPE_UNDO	0x0043U		/* random UNDO information */
#define HAMMER_HEAD_TYPE_REDO	0x0044U		/* data REDO / fast fsync */

#define HAMMER_HEAD_SIGNATURE	0xC84EU
#define HAMMER_TAIL_SIGNATURE	0xC74FU

/*
 * Misc FIFO structures.
 *
 * UNDO - Raw meta-data media updates.
 */
typedef struct hammer_fifo_undo {
	struct hammer_fifo_head	head;
	hammer_off_t		undo_offset;	/* zone-1,2 offset */
	int32_t			undo_data_bytes;
	int32_t			undo_reserved01;
	/* followed by data */
} *hammer_fifo_undo_t;

/*
 * REDO (HAMMER version 4+) - Logical file writes/truncates.
 *
 * REDOs contain information which will be duplicated in a later meta-data
 * update, allowing fast write()+fsync() operations.  REDOs can be ignored
 * without harming filesystem integrity but must be processed if fsync()
 * semantics are desired.
 *
 * Unlike UNDOs which are processed backwards within the recovery span,
 * REDOs must be processed forwards starting further back (starting outside
 * the recovery span).
 *
 *	WRITE	- Write logical file (with payload).  Executed both
 *		  out-of-span and in-span.  Out-of-span WRITEs may be
 *		  filtered out by TERMs.
 *
 *	TRUNC	- Truncate logical file (no payload).  Executed both
 *		  out-of-span and in-span.  Out-of-span WRITEs may be
 *		  filtered out by TERMs.
 *
 *	TERM_*	- Indicates meta-data was committed (if out-of-span) or
 *		  will be rolled-back (in-span).  Any out-of-span TERMs
 *		  matching earlier WRITEs remove those WRITEs from
 *		  consideration as they might conflict with a later data
 *		  commit (which is not being rolled-back).
 *
 *	SYNC	- The earliest in-span SYNC (the last one when scanning
 *		  backwards) tells the recovery code how far out-of-span
 *		  it must go to run REDOs.
 *
 * NOTE: WRITEs do not always have matching TERMs even under
 *	 perfect conditions because truncations might remove the
 *	 buffers from consideration.  I/O problems can also remove
 *	 buffers from consideration.
 *
 *	 TRUNCSs do not always have matching TERMs because several
 *	 truncations may be aggregated together into a single TERM.
 */
typedef struct hammer_fifo_redo {
	struct hammer_fifo_head	head;
	int64_t			redo_objid;	/* file being written */
	hammer_off_t		redo_offset;	/* logical offset in file */
	int32_t			redo_data_bytes;
	uint32_t		redo_flags;
	uint32_t		redo_localization;
	uint32_t		redo_reserved01;
	uint64_t		redo_reserved02;
	/* followed by data */
} *hammer_fifo_redo_t;

#define HAMMER_REDO_WRITE	0x00000001
#define HAMMER_REDO_TRUNC	0x00000002
#define HAMMER_REDO_TERM_WRITE	0x00000004
#define HAMMER_REDO_TERM_TRUNC	0x00000008
#define HAMMER_REDO_SYNC	0x00000010

typedef union hammer_fifo_any {
	struct hammer_fifo_head	head;
	struct hammer_fifo_undo	undo;
	struct hammer_fifo_redo	redo;
} *hammer_fifo_any_t;

/*
 * Volume header types
 */
#define HAMMER_FSBUF_VOLUME	0xC8414D4DC5523031ULL	/* HAMMER01 */
#define HAMMER_FSBUF_VOLUME_REV	0x313052C54D4D41C8ULL	/* (reverse endian) */

/*
 * HAMMER Volume header
 *
 * A HAMMER filesystem can be built from 1-256 block devices, each block
 * device contains a volume header followed by however many buffers fit
 * into the volume.
 *
 * One of the volumes making up a HAMMER filesystem is the root volume.
 * The root volume is always volume #0 which is the first block device path
 * specified by newfs_hammer(8).  All HAMMER volumes have a volume header,
 * however the root volume may be the only volume that has valid values for
 * some fields in the header.
 *
 * Special field notes:
 *
 *	vol_bot_beg - offset of boot area (mem_beg - bot_beg bytes)
 *	vol_mem_beg - offset of memory log (buf_beg - mem_beg bytes)
 *	vol_buf_beg - offset of the first buffer in volume
 *	vol_buf_end - offset of volume EOF (on buffer boundary)
 *
 *	The memory log area allows a kernel to cache new records and data
 *	in memory without allocating space in the actual filesystem to hold
 *	the records and data.  In the event that a filesystem becomes full,
 *	any records remaining in memory can be flushed to the memory log
 *	area.  This allows the kernel to immediately return success.
 *
 *	The buffer offset is a physical offset of zone-2 offset. The lower
 *	52 bits of the zone-2 offset is added to the buffer offset of each
 *	volume to generate an actual I/O offset within the block device.
 *
 *	NOTE: boot area and memory log are currently not used.
 */

/*
 * Filesystem type string
 */
#define HAMMER_FSTYPE_STRING		"DragonFly HAMMER"

/*
 * These macros are only used by userspace when userspace commands either
 * initialize or add a new HAMMER volume.
 */
#define HAMMER_BOOT_MINBYTES		(32*1024)
#define HAMMER_BOOT_NOMBYTES		(64LL*1024*1024)
#define HAMMER_BOOT_MAXBYTES		(256LL*1024*1024)

#define HAMMER_MEM_MINBYTES		(256*1024)
#define HAMMER_MEM_NOMBYTES		(1LL*1024*1024*1024)
#define HAMMER_MEM_MAXBYTES		(64LL*1024*1024*1024)

typedef struct hammer_volume_ondisk {
	uint64_t vol_signature;	/* HAMMER_FSBUF_VOLUME for a valid header */

	/*
	 * These are relative to block device offset, not zone offsets.
	 */
	int64_t vol_bot_beg;	/* offset of boot area */
	int64_t vol_mem_beg;	/* offset of memory log */
	int64_t vol_buf_beg;	/* offset of the first buffer in volume */
	int64_t vol_buf_end;	/* offset of volume EOF (on buffer boundary) */
	int64_t vol_reserved01;

	hammer_uuid_t vol_fsid;	/* identify filesystem */
	hammer_uuid_t vol_fstype; /* identify filesystem type */
	char vol_label[64];	/* filesystem label */

	int32_t vol_no;		/* volume number within filesystem */
	int32_t vol_count;	/* number of volumes making up filesystem */

	uint32_t vol_version;	/* version control information */
	hammer_crc_t vol_crc;	/* header crc */
	uint32_t vol_flags;	/* volume flags */
	uint32_t vol_rootvol;	/* the root volume number (must be 0) */

	uint32_t vol_reserved[8];

	/*
	 * These fields are initialized and space is reserved in every
	 * volume making up a HAMMER filesytem, but only the root volume
	 * contains valid data.  Note that vol0_stat_bigblocks does not
	 * include big-blocks for freemap and undomap initially allocated
	 * by newfs_hammer(8).
	 */
	int64_t vol0_stat_bigblocks;	/* total big-blocks when fs is empty */
	int64_t vol0_stat_freebigblocks;/* number of free big-blocks */
	int64_t	vol0_reserved01;
	int64_t vol0_stat_inodes;	/* for statfs only */
	int64_t vol0_reserved02;
	hammer_off_t vol0_btree_root;	/* B-Tree root offset in zone-8 */
	hammer_tid_t vol0_next_tid;	/* highest partially synchronized TID */
	hammer_off_t vol0_reserved03;

	/*
	 * Blockmaps for zones.  Not all zones use a blockmap.  Note that
	 * the entire root blockmap is cached in the hammer_mount structure.
	 */
	struct hammer_blockmap	vol0_blockmap[HAMMER_MAX_ZONES];

	/*
	 * Array of zone-2 addresses for undo FIFO.
	 */
	hammer_off_t		vol0_undo_array[HAMMER_MAX_UNDO_BIGBLOCKS];
} *hammer_volume_ondisk_t;

#define HAMMER_ROOT_VOLNO		0

#define HAMMER_VOLF_NEEDFLUSH		0x0004	/* volume needs flush */

#define HAMMER_VOL_CRCSIZE1	\
	offsetof(struct hammer_volume_ondisk, vol_crc)
#define HAMMER_VOL_CRCSIZE2	\
	(sizeof(struct hammer_volume_ondisk) - HAMMER_VOL_CRCSIZE1 -	\
	 sizeof(hammer_crc_t))

#define HAMMER_VOL_VERSION_MIN		1	/* minimum supported version */
#define HAMMER_VOL_VERSION_DEFAULT	7	/* newfs default version */
#define HAMMER_VOL_VERSION_WIP		8	/* version >= this is WIP */
#define HAMMER_VOL_VERSION_MAX		7	/* maximum supported version */

#define HAMMER_VOL_VERSION_ONE		1
#define HAMMER_VOL_VERSION_TWO		2	/* new dirent layout (2.3+) */
#define HAMMER_VOL_VERSION_THREE	3	/* new snapshot layout (2.5+) */
#define HAMMER_VOL_VERSION_FOUR		4	/* new undo/flush (2.5+) */
#define HAMMER_VOL_VERSION_FIVE		5	/* dedup (2.9+) */
#define HAMMER_VOL_VERSION_SIX		6	/* DIRHASH_ALG1 */
#define HAMMER_VOL_VERSION_SEVEN	7	/* use the faster iscsi_crc */

/*
 * Translate a zone-2 address to physical address
 */
#define hammer_xlate_to_phys(volume, zone2_offset)	\
	((volume)->vol_buf_beg + HAMMER_OFF_SHORT_ENCODE(zone2_offset))

/*
 * Translate a zone-3 address to zone-2 address
 */
#define HAMMER_UNDO_INDEX(zone3_offset)			\
	(HAMMER_OFF_SHORT_ENCODE(zone3_offset) / HAMMER_BIGBLOCK_SIZE)

#define hammer_xlate_to_undo(volume, zone3_offset)			\
	((volume)->vol0_undo_array[HAMMER_UNDO_INDEX(zone3_offset)] +	\
	 (zone3_offset & HAMMER_BIGBLOCK_MASK64))

/*
 * Effective per-volume filesystem capacity including big-blocks for layer1/2
 */
#define HAMMER_VOL_BUF_SIZE(volume)			\
	((volume)->vol_buf_end - (volume)->vol_buf_beg)

/*
 * Record types are fairly straightforward.  The B-Tree includes the record
 * type in its index sort.
 */
#define HAMMER_RECTYPE_UNKNOWN		0x0000
#define HAMMER_RECTYPE_INODE		0x0001	/* inode in obj_id space */
#define HAMMER_RECTYPE_DATA		0x0010
#define HAMMER_RECTYPE_DIRENTRY		0x0011
#define HAMMER_RECTYPE_DB		0x0012
#define HAMMER_RECTYPE_EXT		0x0013	/* ext attributes */
#define HAMMER_RECTYPE_FIX		0x0014	/* fixed attribute */
#define HAMMER_RECTYPE_PFS		0x0015	/* PFS management */
#define HAMMER_RECTYPE_SNAPSHOT		0x0016	/* Snapshot management */
#define HAMMER_RECTYPE_CONFIG		0x0017	/* hammer cleanup config */
#define HAMMER_RECTYPE_MAX		0xFFFF

#define HAMMER_RECTYPE_ENTRY_START	(HAMMER_RECTYPE_INODE + 1)
#define HAMMER_RECTYPE_CLEAN_START	HAMMER_RECTYPE_EXT

#define HAMMER_FIXKEY_SYMLINK		1

#define HAMMER_OBJTYPE_UNKNOWN		0	/* never exists on-disk as unknown */
#define HAMMER_OBJTYPE_DIRECTORY	1
#define HAMMER_OBJTYPE_REGFILE		2
#define HAMMER_OBJTYPE_DBFILE		3
#define HAMMER_OBJTYPE_FIFO		4
#define HAMMER_OBJTYPE_CDEV		5
#define HAMMER_OBJTYPE_BDEV		6
#define HAMMER_OBJTYPE_SOFTLINK		7
#define HAMMER_OBJTYPE_PSEUDOFS		8	/* pseudo filesystem obj */
#define HAMMER_OBJTYPE_SOCKET		9

/*
 * HAMMER inode attribute data
 *
 * The data reference for a HAMMER inode points to this structure.  Any
 * modifications to the contents of this structure will result in a
 * replacement operation.
 *
 * parent_obj_id is only valid for directories (which cannot be hard-linked),
 * and specifies the parent directory obj_id.  This field will also be set
 * for non-directory inodes as a recovery aid, but can wind up holding
 * stale information.  However, since object id's are not reused, the worse
 * that happens is that the recovery code is unable to use it.
 * A parent_obj_id of 0 means it's a root inode of root or non-root PFS.
 *
 * NOTE: Future note on directory hardlinks.  We can implement a record type
 * which allows us to point to multiple parent directories.
 */
typedef struct hammer_inode_data {
	uint16_t version;	/* inode data version */
	uint16_t mode;		/* basic unix permissions */
	uint32_t uflags;	/* chflags */
	uint32_t rmajor;	/* used by device nodes */
	uint32_t rminor;	/* used by device nodes */
	uint64_t ctime;
	int64_t parent_obj_id;	/* parent directory obj_id */
	hammer_uuid_t uid;
	hammer_uuid_t gid;

	uint8_t obj_type;
	uint8_t cap_flags;	/* capability support flags (extension) */
	uint16_t reserved01;
	uint32_t reserved02;
	uint64_t nlinks;	/* hard links */
	uint64_t size;		/* filesystem object size */
	union {
		char	symlink[24];	/* HAMMER_INODE_BASESYMLEN */
	} ext;
	uint64_t mtime;	/* mtime must be second-to-last */
	uint64_t atime;	/* atime must be last */
} *hammer_inode_data_t;

/*
 * Neither mtime nor atime upates are CRCd by the B-Tree element.
 * mtime updates have UNDO, atime updates do not.
 */
#define HAMMER_INODE_CRCSIZE	\
	offsetof(struct hammer_inode_data, mtime)

#define HAMMER_INODE_DATA_VERSION	1
#define HAMMER_OBJID_ROOT		1	/* root inodes # */
#define HAMMER_INODE_BASESYMLEN		24	/* see ext.symlink */

/*
 * Capability & implementation flags.
 *
 * HAMMER_INODE_CAP_DIR_LOCAL_INO - Use inode B-Tree localization
 * for directory entries.  Also see HAMMER_DIR_INODE_LOCALIZATION().
 */
#define HAMMER_INODE_CAP_DIRHASH_MASK	0x03	/* directory: hash algorithm */
#define HAMMER_INODE_CAP_DIRHASH_ALG0	0x00
#define HAMMER_INODE_CAP_DIRHASH_ALG1	0x01
#define HAMMER_INODE_CAP_DIRHASH_ALG2	0x02
#define HAMMER_INODE_CAP_DIRHASH_ALG3	0x03
#define HAMMER_INODE_CAP_DIR_LOCAL_INO	0x04	/* use inode localization */

#define HAMMER_DATA_DOALIGN(offset)				\
	(((offset) + 15) & ~15)
#define HAMMER_DATA_DOALIGN_WITH(type, offset)			\
	(((type)(offset) + 15) & (~(type)15))

/*
 * A HAMMER directory entry associates a HAMMER filesystem object with a
 * namespace.  It is hooked into a pseudo-filesystem (with its own inode
 * numbering space) in the filesystem by setting the high 16 bits of the
 * localization field.  The low 16 bits must be 0 and are reserved for
 * future use.
 *
 * Directory entries are indexed with a 128 bit namekey rather then an
 * offset.  A portion of the namekey is an iterator/randomizer to deal
 * with collisions.
 *
 * NOTE: leaf.base.obj_type from the related B-Tree leaf entry holds
 * the filesystem object type of obj_id, e.g. a den_type equivalent.
 * It is not stored in hammer_direntry_data.
 *
 * NOTE: name field / the filename data reference is NOT terminated with \0.
 */
typedef struct hammer_direntry_data {
	int64_t obj_id;			/* object being referenced */
	uint32_t localization;		/* identify pseudo-filesystem */
	uint32_t reserved01;
	char	name[16];		/* name (extended) */
} *hammer_direntry_data_t;

#define HAMMER_ENTRY_NAME_OFF	offsetof(struct hammer_direntry_data, name[0])
#define HAMMER_ENTRY_SIZE(nlen)	offsetof(struct hammer_direntry_data, name[nlen])

/*
 * Symlink data which does not fit in the inode is stored in a separate
 * FIX type record.
 */
typedef struct hammer_symlink_data {
	char	name[16];		/* name (extended) */
} *hammer_symlink_data_t;

#define HAMMER_SYMLINK_NAME_OFF	offsetof(struct hammer_symlink_data, name[0])

/*
 * The root inode for the primary filesystem and root inode for any
 * pseudo-fs may be tagged with an optional data structure using
 * HAMMER_RECTYPE_PFS and localization id.  This structure allows
 * the node to be used as a mirroring master or slave.
 *
 * When operating as a slave CD's into the node automatically become read-only
 * and as-of sync_end_tid.
 *
 * When operating as a master the read PFSD info sets sync_end_tid to
 * the most recently flushed TID.
 *
 * sync_low_tid is not yet used but will represent the highest pruning
 * end-point, after which full history is available.
 *
 * We need to pack this structure making it equally sized on both 32-bit and
 * 64-bit machines as it is part of struct hammer_ioc_mrecord_pfs which is
 * send over the wire in hammer mirror operations. Only on 64-bit machines
 * the size of this struct differ when packed or not. This leads us to the
 * situation where old 64-bit systems (using the non-packed structure),
 * which were never able to mirror to/from 32-bit systems, are now no longer
 * able to mirror to/from newer 64-bit systems (using the packed structure).
 */
struct hammer_pseudofs_data {
	hammer_tid_t	sync_low_tid;	/* full history beyond this point */
	hammer_tid_t	sync_beg_tid;	/* earliest tid w/ full history avail */
	hammer_tid_t	sync_end_tid;	/* current synchronizatoin point */
	uint64_t	sync_beg_ts;	/* real-time of last completed sync */
	uint64_t	sync_end_ts;	/* initiation of current sync cycle */
	hammer_uuid_t	shared_uuid;	/* shared uuid (match required) */
	hammer_uuid_t	unique_uuid;	/* unique uuid of this master/slave */
	int32_t		reserved01;	/* reserved for future master_id */
	int32_t		mirror_flags;	/* misc flags */
	char		label[64];	/* filesystem space label */
	char		snapshots[64];	/* softlink dir for pruning */
	int32_t		reserved02;	/* was prune_{time,freq} */
	int32_t		reserved03;	/* was reblock_{time,freq} */
	int32_t		reserved04;	/* was snapshot_freq */
	int32_t		prune_min;	/* do not prune recent history */
	int32_t		prune_max;	/* do not retain history beyond here */
	int32_t		reserved[16];
} __packed;

typedef struct hammer_pseudofs_data *hammer_pseudofs_data_t;

#define HAMMER_PFSD_SLAVE	0x00000001
#define HAMMER_PFSD_DELETED	0x80000000

#define hammer_is_pfs_slave(pfsd)			\
	(((pfsd)->mirror_flags & HAMMER_PFSD_SLAVE) != 0)
#define hammer_is_pfs_master(pfsd)			\
	(!hammer_is_pfs_slave(pfsd))
#define hammer_is_pfs_deleted(pfsd)			\
	(((pfsd)->mirror_flags & HAMMER_PFSD_DELETED) != 0)

#define HAMMER_MAX_PFS		65536
#define HAMMER_MAX_PFSID	(HAMMER_MAX_PFS - 1)
#define HAMMER_ROOT_PFSID	0

/*
 * Snapshot meta-data { Objid = HAMMER_OBJID_ROOT, Key = tid, rectype = SNAPSHOT }.
 *
 * Snapshot records replace the old <fs>/snapshots/<softlink> methodology.  Snapshot
 * records are mirrored but may be independantly managed once they are laid down on
 * a slave.
 *
 * NOTE: The b-tree key is signed, the tid is not, so callers must still sort the
 *	 results.
 *
 * NOTE: Reserved fields must be zero (as usual)
 */
typedef struct hammer_snapshot_data {
	hammer_tid_t	tid;		/* the snapshot TID itself (== key) */
	uint64_t	ts;		/* real-time when snapshot was made */
	uint64_t	reserved01;
	uint64_t	reserved02;
	char		label[64];	/* user-supplied description */
	uint64_t	reserved03[4];
} *hammer_snapshot_data_t;

/*
 * Config meta-data { ObjId = HAMMER_OBJID_ROOT, Key = 0, rectype = CONFIG }.
 *
 * Used to store the hammer cleanup config.  This data is not mirrored.
 */
typedef struct hammer_config_data {
	char		text[1024];
} *hammer_config_data_t;

/*
 * Rollup various structures embedded as record data
 */
typedef union hammer_data_ondisk {
	struct hammer_direntry_data entry;
	struct hammer_inode_data inode;
	struct hammer_symlink_data symlink;
	struct hammer_pseudofs_data pfsd;
	struct hammer_snapshot_data snap;
	struct hammer_config_data config;
} *hammer_data_ondisk_t;

/*
 * Ondisk layout of B-Tree related structures
 */
#if 0	 /* Not needed for fstype(8) */
#include "hammer_btree.h"
#endif

#define HAMMER_DIR_INODE_LOCALIZATION(ino_data)				\
	(((ino_data)->cap_flags & HAMMER_INODE_CAP_DIR_LOCAL_INO) ?	\
	 HAMMER_LOCALIZE_INODE :					\
	 HAMMER_LOCALIZE_MISC)

#endif /* !VFS_HAMMER_DISK_H_ */