MySQL 5.6.14 Source Code Document
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
page0zip.cc
Go to the documentation of this file.
1 /*****************************************************************************
2 
3 Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2012, Facebook Inc.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************/
27 #include <map>
28 using namespace std;
29 
30 #define THIS_MODULE
31 #include "page0zip.h"
32 #ifdef UNIV_NONINL
33 # include "page0zip.ic"
34 #endif
35 #undef THIS_MODULE
36 #include "page0page.h"
37 #include "mtr0log.h"
38 #include "ut0sort.h"
39 #include "dict0dict.h"
40 #include "btr0cur.h"
41 #include "page0types.h"
42 #include "log0recv.h"
43 #include "zlib.h"
44 #ifndef UNIV_HOTBACKUP
45 # include "buf0buf.h"
46 # include "buf0lru.h"
47 # include "btr0sea.h"
48 # include "dict0boot.h"
49 # include "lock0lock.h"
50 # include "srv0mon.h"
51 # include "srv0srv.h"
52 # include "ut0crc32.h"
53 #else /* !UNIV_HOTBACKUP */
54 # include "buf0checksum.h"
55 # define lock_move_reorganize_page(block, temp_block) ((void) 0)
56 # define buf_LRU_stat_inc_unzip() ((void) 0)
57 #endif /* !UNIV_HOTBACKUP */
58 
59 #ifndef UNIV_HOTBACKUP
60 
66 #ifdef HAVE_PSI_INTERFACE
67 UNIV_INTERN mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
68 #endif /* HAVE_PSI_INTERFACE */
69 #endif /* !UNIV_HOTBACKUP */
70 
71 /* Compression level to be used by zlib. Settable by user. */
72 UNIV_INTERN uint page_zip_level = DEFAULT_COMPRESSION_LEVEL;
73 
74 /* Whether or not to log compressed page images to avoid possible
75 compression algorithm changes in zlib. */
76 UNIV_INTERN my_bool page_zip_log_pages = true;
77 
78 /* Please refer to ../include/page0zip.ic for a description of the
79 compressed page format. */
80 
81 /* The infimum and supremum records are omitted from the compressed page.
82 On compress, we compare that the records are there, and on uncompress we
83 restore the records. */
85 static const byte infimum_extra[] = {
86  0x01, /* info_bits=0, n_owned=1 */
87  0x00, 0x02 /* heap_no=0, status=2 */
88  /* ?, ? */ /* next=(first user rec, or supremum) */
89 };
91 static const byte infimum_data[] = {
92  0x69, 0x6e, 0x66, 0x69,
93  0x6d, 0x75, 0x6d, 0x00 /* "infimum\0" */
94 };
96 static const byte supremum_extra_data[] = {
97  /* 0x0?, */ /* info_bits=0, n_owned=1..8 */
98  0x00, 0x0b, /* heap_no=1, status=3 */
99  0x00, 0x00, /* next=0 */
100  0x73, 0x75, 0x70, 0x72,
101  0x65, 0x6d, 0x75, 0x6d /* "supremum" */
102 };
103 
108 #define ASSERT_ZERO(b, s) \
109  ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
110 
112 #define ASSERT_ZERO_BLOB(b) \
113  ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
114 
115 /* Enable some extra debugging output. This code can be enabled
116 independently of any UNIV_ debugging conditions. */
117 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
118 # include <stdarg.h>
119 __attribute__((format (printf, 1, 2)))
120 /**********************************************************************/
123 static
124 int
125 page_zip_fail_func(
126 /*===============*/
127  const char* fmt,
128  ...)
129 {
130  int res;
131  va_list ap;
132 
133  ut_print_timestamp(stderr);
134  fputs(" InnoDB: ", stderr);
135  va_start(ap, fmt);
136  res = vfprintf(stderr, fmt, ap);
137  va_end(ap);
138 
139  return(res);
140 }
143 # define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
144 #else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
145 
147 # define page_zip_fail(fmt_args) /* empty */
148 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
149 
150 #ifndef UNIV_HOTBACKUP
151 /**********************************************************************/
154 UNIV_INTERN
155 ulint
157 /*================*/
158  ulint n_fields,
159  ulint zip_size)
160 {
161  lint size = zip_size
162  /* subtract the page header and the longest
163  uncompressed data needed for one record */
164  - (PAGE_DATA
165  + PAGE_ZIP_DIR_SLOT_SIZE
166  + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
167  + 1/* encoded heap_no==2 in page_zip_write_rec() */
168  + 1/* end of modification log */
169  - REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
170  /* subtract the space for page_zip_fields_encode() */
171  - compressBound(2 * (n_fields + 1));
172  return(size > 0 ? (ulint) size : 0);
173 }
174 #endif /* !UNIV_HOTBACKUP */
175 
176 /*************************************************************/
180 UNIV_INLINE
181 ulint
183 /*===============*/
184  const page_zip_des_t* page_zip)
185 {
186  /* Exclude the page infimum and supremum from the record count. */
187  return(page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW);
188 }
189 
190 /*************************************************************/
194 UNIV_INLINE
195 ulint
197 /*==============*/
198  const page_zip_des_t* page_zip)
199 {
200  return(PAGE_ZIP_DIR_SLOT_SIZE * page_zip_dir_elems(page_zip));
201 }
202 
203 /*************************************************************/
207 UNIV_INLINE
208 ulint
210 /*====================*/
211  const page_zip_des_t* page_zip,
212  ulint n_dense)
213 {
214  ut_ad(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip_get_size(page_zip));
215 
216  return(page_zip_get_size(page_zip) - n_dense * PAGE_ZIP_DIR_SLOT_SIZE);
217 }
218 
219 /*************************************************************/
225 #define page_zip_dir_start_low(page_zip, n_dense) \
226  ((page_zip)->data + page_zip_dir_start_offs(page_zip, n_dense))
227 /*************************************************************/
232 #define page_zip_dir_start(page_zip) \
233  page_zip_dir_start_low(page_zip, page_zip_dir_elems(page_zip))
234 
235 /*************************************************************/
239 UNIV_INLINE
240 ulint
242 /*===================*/
243  const page_zip_des_t* page_zip)
244 {
245  ulint size = PAGE_ZIP_DIR_SLOT_SIZE
246  * page_get_n_recs(page_zip->data);
247  ut_ad(size <= page_zip_dir_size(page_zip));
248  return(size);
249 }
250 
251 /*************************************************************/
254 UNIV_INLINE
255 byte*
257 /*==================*/
258  byte* slot,
259  byte* end,
260  ulint offset)
261 {
262  ut_ad(slot <= end);
263 
264  for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
265  if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
266  == offset) {
267  return(slot);
268  }
269  }
270 
271  return(NULL);
272 }
273 
274 /*************************************************************/
277 UNIV_INLINE
278 byte*
280 /*==============*/
281  page_zip_des_t* page_zip,
282  ulint offset)
283 {
284  byte* end = page_zip->data + page_zip_get_size(page_zip);
285 
286  ut_ad(page_zip_simple_validate(page_zip));
287 
288  return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
289  end,
290  offset));
291 }
292 
293 /*************************************************************/
296 UNIV_INLINE
297 byte*
299 /*===================*/
300  page_zip_des_t* page_zip,
301  ulint offset)
302 {
303  byte* end = page_zip->data + page_zip_get_size(page_zip);
304 
305  ut_ad(page_zip_simple_validate(page_zip));
306 
307  return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
308  end - page_zip_dir_user_size(page_zip),
309  offset));
310 }
311 
312 /*************************************************************/
316 UNIV_INLINE
317 ulint
319 /*=============*/
320  const page_zip_des_t* page_zip,
321  ulint slot)
323 {
324  ut_ad(page_zip_simple_validate(page_zip));
325  ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
326  return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
327  - PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
328 }
329 
330 #ifndef UNIV_HOTBACKUP
331 /**********************************************************************/
333 static
334 void
335 page_zip_compress_write_log(
336 /*========================*/
337  const page_zip_des_t* page_zip,
338  const page_t* page,
340  mtr_t* mtr)
341 {
342  byte* log_ptr;
343  ulint trailer_size;
344 
345  ut_ad(!dict_index_is_ibuf(index));
346 
347  log_ptr = mlog_open(mtr, 11 + 2 + 2);
348 
349  if (!log_ptr) {
350 
351  return;
352  }
353 
354  /* Read the number of user records. */
355  trailer_size = page_dir_get_n_heap(page_zip->data)
356  - PAGE_HEAP_NO_USER_LOW;
357  /* Multiply by uncompressed of size stored per record */
358  if (!page_is_leaf(page)) {
359  trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
360  } else if (dict_index_is_clust(index)) {
361  trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
362  + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
363  } else {
364  trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
365  }
366  /* Add the space occupied by BLOB pointers. */
367  trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
368  ut_a(page_zip->m_end > PAGE_DATA);
369 #if FIL_PAGE_DATA > PAGE_DATA
370 # error "FIL_PAGE_DATA > PAGE_DATA"
371 #endif
372  ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
373 
374  log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
376  log_ptr, mtr);
377  mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
378  log_ptr += 2;
379  mach_write_to_2(log_ptr, trailer_size);
380  log_ptr += 2;
381  mlog_close(mtr, log_ptr);
382 
383  /* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
384  mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
385  mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
386  /* Write most of the page header, the compressed stream and
387  the modification log. */
388  mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
389  page_zip->m_end - FIL_PAGE_TYPE);
390  /* Write the uncompressed trailer of the compressed page. */
391  mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
392  - trailer_size, trailer_size);
393 }
394 #endif /* !UNIV_HOTBACKUP */
395 
396 /******************************************************/
399 static
400 ulint
401 page_zip_get_n_prev_extern(
402 /*=======================*/
403  const page_zip_des_t* page_zip,
405  const rec_t* rec,
407  const dict_index_t* index)
408 {
409  const page_t* page = page_align(rec);
410  ulint n_ext = 0;
411  ulint i;
412  ulint left;
413  ulint heap_no;
414  ulint n_recs = page_get_n_recs(page_zip->data);
415 
416  ut_ad(page_is_leaf(page));
417  ut_ad(page_is_comp(page));
418  ut_ad(dict_table_is_comp(index->table));
419  ut_ad(dict_index_is_clust(index));
420  ut_ad(!dict_index_is_ibuf(index));
421 
422  heap_no = rec_get_heap_no_new(rec);
423  ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
424  left = heap_no - PAGE_HEAP_NO_USER_LOW;
425  if (UNIV_UNLIKELY(!left)) {
426  return(0);
427  }
428 
429  for (i = 0; i < n_recs; i++) {
430  const rec_t* r = page + (page_zip_dir_get(page_zip, i)
431  & PAGE_ZIP_DIR_SLOT_MASK);
432 
433  if (rec_get_heap_no_new(r) < heap_no) {
434  n_ext += rec_get_n_extern_new(r, index,
435  ULINT_UNDEFINED);
436  if (!--left) {
437  break;
438  }
439  }
440  }
441 
442  return(n_ext);
443 }
444 
445 /**********************************************************************/
448 static
449 byte*
450 page_zip_fixed_field_encode(
451 /*========================*/
452  byte* buf,
453  ulint val)
454 {
455  ut_ad(val >= 2);
456 
457  if (UNIV_LIKELY(val < 126)) {
458  /*
459  0 = nullable variable field of at most 255 bytes length;
460  1 = not null variable field of at most 255 bytes length;
461  126 = nullable variable field with maximum length >255;
462  127 = not null variable field with maximum length >255
463  */
464  *buf++ = (byte) val;
465  } else {
466  *buf++ = (byte) (0x80 | val >> 8);
467  *buf++ = (byte) val;
468  }
469 
470  return(buf);
471 }
472 
473 /**********************************************************************/
476 static
477 ulint
478 page_zip_fields_encode(
479 /*===================*/
480  ulint n,
481  dict_index_t* index,
482  ulint trx_id_pos,
485  byte* buf)
486 {
487  const byte* buf_start = buf;
488  ulint i;
489  ulint col;
490  ulint trx_id_col = 0;
491  /* sum of lengths of preceding non-nullable fixed fields, or 0 */
492  ulint fixed_sum = 0;
493 
494  ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
495 
496  for (i = col = 0; i < n; i++) {
497  dict_field_t* field = dict_index_get_nth_field(index, i);
498  ulint val;
499 
500  if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
501  val = 1; /* set the "not nullable" flag */
502  } else {
503  val = 0; /* nullable field */
504  }
505 
506  if (!field->fixed_len) {
507  /* variable-length field */
508  const dict_col_t* column
509  = dict_field_get_col(field);
510 
511  if (UNIV_UNLIKELY(column->len > 255)
512  || UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
513  val |= 0x7e; /* max > 255 bytes */
514  }
515 
516  if (fixed_sum) {
517  /* write out the length of any
518  preceding non-nullable fields */
519  buf = page_zip_fixed_field_encode(
520  buf, fixed_sum << 1 | 1);
521  fixed_sum = 0;
522  col++;
523  }
524 
525  *buf++ = (byte) val;
526  col++;
527  } else if (val) {
528  /* fixed-length non-nullable field */
529 
530  if (fixed_sum && UNIV_UNLIKELY
531  (fixed_sum + field->fixed_len
533  /* Write out the length of the
534  preceding non-nullable fields,
535  to avoid exceeding the maximum
536  length of a fixed-length column. */
537  buf = page_zip_fixed_field_encode(
538  buf, fixed_sum << 1 | 1);
539  fixed_sum = 0;
540  col++;
541  }
542 
543  if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
544  if (fixed_sum) {
545  /* Write out the length of any
546  preceding non-nullable fields,
547  and start a new trx_id column. */
548  buf = page_zip_fixed_field_encode(
549  buf, fixed_sum << 1 | 1);
550  col++;
551  }
552 
553  trx_id_col = col;
554  fixed_sum = field->fixed_len;
555  } else {
556  /* add to the sum */
557  fixed_sum += field->fixed_len;
558  }
559  } else {
560  /* fixed-length nullable field */
561 
562  if (fixed_sum) {
563  /* write out the length of any
564  preceding non-nullable fields */
565  buf = page_zip_fixed_field_encode(
566  buf, fixed_sum << 1 | 1);
567  fixed_sum = 0;
568  col++;
569  }
570 
571  buf = page_zip_fixed_field_encode(
572  buf, field->fixed_len << 1);
573  col++;
574  }
575  }
576 
577  if (fixed_sum) {
578  /* Write out the lengths of last fixed-length columns. */
579  buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
580  }
581 
582  if (trx_id_pos != ULINT_UNDEFINED) {
583  /* Write out the position of the trx_id column */
584  i = trx_id_col;
585  } else {
586  /* Write out the number of nullable fields */
587  i = index->n_nullable;
588  }
589 
590  if (i < 128) {
591  *buf++ = (byte) i;
592  } else {
593  *buf++ = (byte) (0x80 | i >> 8);
594  *buf++ = (byte) i;
595  }
596 
597  ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
598  return((ulint) (buf - buf_start));
599 }
600 
601 /**********************************************************************/
603 static
604 void
605 page_zip_dir_encode(
606 /*================*/
607  const page_t* page,
608  byte* buf,
610  const rec_t** recs)
613 {
614  const byte* rec;
615  ulint status;
616  ulint min_mark;
617  ulint heap_no;
618  ulint i;
619  ulint n_heap;
620  ulint offs;
621 
622  min_mark = 0;
623 
624  if (page_is_leaf(page)) {
625  status = REC_STATUS_ORDINARY;
626  } else {
627  status = REC_STATUS_NODE_PTR;
628  if (UNIV_UNLIKELY
629  (mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
630  min_mark = REC_INFO_MIN_REC_FLAG;
631  }
632  }
633 
634  n_heap = page_dir_get_n_heap(page);
635 
636  /* Traverse the list of stored records in the collation order,
637  starting from the first user record. */
638 
639  rec = page + PAGE_NEW_INFIMUM;
640 
641  i = 0;
642 
643  for (;;) {
644  ulint info_bits;
645  offs = rec_get_next_offs(rec, TRUE);
646  if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
647  break;
648  }
649  rec = page + offs;
650  heap_no = rec_get_heap_no_new(rec);
651  ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
652  ut_a(heap_no < n_heap);
653  ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
654  ut_a(offs >= PAGE_ZIP_START);
655 #if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
656 # error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
657 #endif
658 #if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE_MAX - 1
659 # error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_SIZE_MAX - 1"
660 #endif
661  if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
662  offs |= PAGE_ZIP_DIR_SLOT_OWNED;
663  }
664 
665  info_bits = rec_get_info_bits(rec, TRUE);
666  if (info_bits & REC_INFO_DELETED_FLAG) {
667  info_bits &= ~REC_INFO_DELETED_FLAG;
668  offs |= PAGE_ZIP_DIR_SLOT_DEL;
669  }
670  ut_a(info_bits == min_mark);
671  /* Only the smallest user record can have
672  REC_INFO_MIN_REC_FLAG set. */
673  min_mark = 0;
674 
675  mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
676 
677  if (UNIV_LIKELY_NULL(recs)) {
678  /* Ensure that each heap_no occurs at most once. */
679  ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
680  /* exclude infimum and supremum */
681  recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
682  }
683 
684  ut_a(rec_get_status(rec) == status);
685  }
686 
687  offs = page_header_get_field(page, PAGE_FREE);
688 
689  /* Traverse the free list (of deleted records). */
690  while (offs) {
691  ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
692  rec = page + offs;
693 
694  heap_no = rec_get_heap_no_new(rec);
695  ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
696  ut_a(heap_no < n_heap);
697 
698  ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
699  ut_a(rec_get_status(rec) == status);
700 
701  mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
702 
703  if (UNIV_LIKELY_NULL(recs)) {
704  /* Ensure that each heap_no occurs at most once. */
705  ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
706  /* exclude infimum and supremum */
707  recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
708  }
709 
710  offs = rec_get_next_offs(rec, TRUE);
711  }
712 
713  /* Ensure that each heap no occurs at least once. */
714  ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
715 }
716 
717 extern "C" {
718 
719 /**********************************************************************/
721 static
722 void*
723 page_zip_zalloc(
724 /*============*/
725  void* opaque,
726  uInt items,
727  uInt size)
728 {
729  return(mem_heap_zalloc(static_cast<mem_heap_t*>(opaque), items * size));
730 }
731 
732 /**********************************************************************/
734 static
735 void
736 page_zip_free(
737 /*==========*/
738  void* opaque __attribute__((unused)),
739  void* address __attribute__((unused)))
740 {
741 }
742 
743 } /* extern "C" */
744 
745 /**********************************************************************/
747 UNIV_INTERN
748 void
750 /*===============*/
751  void* stream,
752  mem_heap_t* heap)
753 {
754  z_stream* strm = static_cast<z_stream*>(stream);
755 
756  strm->zalloc = page_zip_zalloc;
757  strm->zfree = page_zip_free;
758  strm->opaque = heap;
759 }
760 
761 #if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
762 
763 # define PAGE_ZIP_COMPRESS_DBG
764 #endif
765 
766 #ifdef PAGE_ZIP_COMPRESS_DBG
767 
769 UNIV_INTERN ibool page_zip_compress_dbg;
774 UNIV_INTERN unsigned page_zip_compress_log;
775 
776 /**********************************************************************/
779 static
780 int
781 page_zip_compress_deflate(
782 /*======================*/
783  FILE* logfile,
784  z_streamp strm,
785  int flush)
786 {
787  int status;
788  if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
789  ut_print_buf(stderr, strm->next_in, strm->avail_in);
790  }
791  if (UNIV_LIKELY_NULL(logfile)) {
792  fwrite(strm->next_in, 1, strm->avail_in, logfile);
793  }
794  status = deflate(strm, flush);
795  if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
796  fprintf(stderr, " -> %d\n", status);
797  }
798  return(status);
799 }
800 
801 /* Redefine deflate(). */
802 # undef deflate
803 
808 # define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
809 
810 # define FILE_LOGFILE FILE* logfile,
811 
812 # define LOGFILE logfile,
813 #else /* PAGE_ZIP_COMPRESS_DBG */
814 
815 # define FILE_LOGFILE
816 
817 # define LOGFILE
818 #endif /* PAGE_ZIP_COMPRESS_DBG */
819 
820 /**********************************************************************/
823 static
824 int
825 page_zip_compress_node_ptrs(
826 /*========================*/
828  z_stream* c_stream,
829  const rec_t** recs,
831  ulint n_dense,
832  dict_index_t* index,
833  byte* storage,
834  mem_heap_t* heap)
835 {
836  int err = Z_OK;
837  ulint* offsets = NULL;
838 
839  do {
840  const rec_t* rec = *recs++;
841 
842  offsets = rec_get_offsets(rec, index, offsets,
843  ULINT_UNDEFINED, &heap);
844  /* Only leaf nodes may contain externally stored columns. */
845  ut_ad(!rec_offs_any_extern(offsets));
846 
847  UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
848  UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
849  rec_offs_extra_size(offsets));
850 
851  /* Compress the extra bytes. */
852  c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
853  - c_stream->next_in;
854 
855  if (c_stream->avail_in) {
856  err = deflate(c_stream, Z_NO_FLUSH);
857  if (UNIV_UNLIKELY(err != Z_OK)) {
858  break;
859  }
860  }
861  ut_ad(!c_stream->avail_in);
862 
863  /* Compress the data bytes, except node_ptr. */
864  c_stream->next_in = (byte*) rec;
865  c_stream->avail_in = rec_offs_data_size(offsets)
866  - REC_NODE_PTR_SIZE;
867 
868  if (c_stream->avail_in) {
869  err = deflate(c_stream, Z_NO_FLUSH);
870  if (UNIV_UNLIKELY(err != Z_OK)) {
871  break;
872  }
873  }
874 
875  ut_ad(!c_stream->avail_in);
876 
877  memcpy(storage - REC_NODE_PTR_SIZE
878  * (rec_get_heap_no_new(rec) - 1),
879  c_stream->next_in, REC_NODE_PTR_SIZE);
880  c_stream->next_in += REC_NODE_PTR_SIZE;
881  } while (--n_dense);
882 
883  return(err);
884 }
885 
886 /**********************************************************************/
889 static
890 int
891 page_zip_compress_sec(
892 /*==================*/
894  z_stream* c_stream,
895  const rec_t** recs,
897  ulint n_dense)
898 {
899  int err = Z_OK;
900 
901  ut_ad(n_dense > 0);
902 
903  do {
904  const rec_t* rec = *recs++;
905 
906  /* Compress everything up to this record. */
907  c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
908  - c_stream->next_in;
909 
910  if (UNIV_LIKELY(c_stream->avail_in)) {
911  UNIV_MEM_ASSERT_RW(c_stream->next_in,
912  c_stream->avail_in);
913  err = deflate(c_stream, Z_NO_FLUSH);
914  if (UNIV_UNLIKELY(err != Z_OK)) {
915  break;
916  }
917  }
918 
919  ut_ad(!c_stream->avail_in);
920  ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
921 
922  /* Skip the REC_N_NEW_EXTRA_BYTES. */
923 
924  c_stream->next_in = (byte*) rec;
925  } while (--n_dense);
926 
927  return(err);
928 }
929 
930 /**********************************************************************/
934 static
935 int
936 page_zip_compress_clust_ext(
937 /*========================*/
939  z_stream* c_stream,
940  const rec_t* rec,
941  const ulint* offsets,
942  ulint trx_id_col,
943  byte* deleted,
945  byte* storage,
946  byte** externs,
948  ulint* n_blobs)
950 {
951  int err;
952  ulint i;
953 
954  UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
955  UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
956  rec_offs_extra_size(offsets));
957 
958  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
959  ulint len;
960  const byte* src;
961 
962  if (UNIV_UNLIKELY(i == trx_id_col)) {
963  ut_ad(!rec_offs_nth_extern(offsets, i));
964  /* Store trx_id and roll_ptr
965  in uncompressed form. */
966  src = rec_get_nth_field(rec, offsets, i, &len);
967  ut_ad(src + DATA_TRX_ID_LEN
968  == rec_get_nth_field(rec, offsets,
969  i + 1, &len));
970  ut_ad(len == DATA_ROLL_PTR_LEN);
971 
972  /* Compress any preceding bytes. */
973  c_stream->avail_in
974  = src - c_stream->next_in;
975 
976  if (c_stream->avail_in) {
977  err = deflate(c_stream, Z_NO_FLUSH);
978  if (UNIV_UNLIKELY(err != Z_OK)) {
979 
980  return(err);
981  }
982  }
983 
984  ut_ad(!c_stream->avail_in);
985  ut_ad(c_stream->next_in == src);
986 
987  memcpy(storage
988  - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
989  * (rec_get_heap_no_new(rec) - 1),
990  c_stream->next_in,
991  DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
992 
993  c_stream->next_in
994  += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
995 
996  /* Skip also roll_ptr */
997  i++;
998  } else if (rec_offs_nth_extern(offsets, i)) {
999  src = rec_get_nth_field(rec, offsets, i, &len);
1001  src += len - BTR_EXTERN_FIELD_REF_SIZE;
1002 
1003  c_stream->avail_in = src
1004  - c_stream->next_in;
1005  if (UNIV_LIKELY(c_stream->avail_in)) {
1006  err = deflate(c_stream, Z_NO_FLUSH);
1007  if (UNIV_UNLIKELY(err != Z_OK)) {
1008 
1009  return(err);
1010  }
1011  }
1012 
1013  ut_ad(!c_stream->avail_in);
1014  ut_ad(c_stream->next_in == src);
1015 
1016  /* Reserve space for the data at
1017  the end of the space reserved for
1018  the compressed data and the page
1019  modification log. */
1020 
1021  if (UNIV_UNLIKELY
1022  (c_stream->avail_out
1023  <= BTR_EXTERN_FIELD_REF_SIZE)) {
1024  /* out of space */
1025  return(Z_BUF_ERROR);
1026  }
1027 
1028  ut_ad(*externs == c_stream->next_out
1029  + c_stream->avail_out
1030  + 1/* end of modif. log */);
1031 
1032  c_stream->next_in
1034 
1035  /* Skip deleted records. */
1036  if (UNIV_LIKELY_NULL
1038  storage, deleted,
1039  page_offset(rec)))) {
1040  continue;
1041  }
1042 
1043  (*n_blobs)++;
1044  c_stream->avail_out
1046  *externs -= BTR_EXTERN_FIELD_REF_SIZE;
1047 
1048  /* Copy the BLOB pointer */
1049  memcpy(*externs, c_stream->next_in
1050  - BTR_EXTERN_FIELD_REF_SIZE,
1051  BTR_EXTERN_FIELD_REF_SIZE);
1052  }
1053  }
1054 
1055  return(Z_OK);
1056 }
1057 
1058 /**********************************************************************/
1061 static
1062 int
1063 page_zip_compress_clust(
1064 /*====================*/
1065  FILE_LOGFILE
1066  z_stream* c_stream,
1067  const rec_t** recs,
1069  ulint n_dense,
1070  dict_index_t* index,
1071  ulint* n_blobs,
1073  ulint trx_id_col,
1074  byte* deleted,
1076  byte* storage,
1077  mem_heap_t* heap)
1078 {
1079  int err = Z_OK;
1080  ulint* offsets = NULL;
1081  /* BTR_EXTERN_FIELD_REF storage */
1082  byte* externs = storage - n_dense
1083  * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1084 
1085  ut_ad(*n_blobs == 0);
1086 
1087  do {
1088  const rec_t* rec = *recs++;
1089 
1090  offsets = rec_get_offsets(rec, index, offsets,
1091  ULINT_UNDEFINED, &heap);
1092  ut_ad(rec_offs_n_fields(offsets)
1093  == dict_index_get_n_fields(index));
1094  UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1095  UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1096  rec_offs_extra_size(offsets));
1097 
1098  /* Compress the extra bytes. */
1099  c_stream->avail_in = rec - REC_N_NEW_EXTRA_BYTES
1100  - c_stream->next_in;
1101 
1102  if (c_stream->avail_in) {
1103  err = deflate(c_stream, Z_NO_FLUSH);
1104  if (UNIV_UNLIKELY(err != Z_OK)) {
1105 
1106  goto func_exit;
1107  }
1108  }
1109  ut_ad(!c_stream->avail_in);
1110  ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
1111 
1112  /* Compress the data bytes. */
1113 
1114  c_stream->next_in = (byte*) rec;
1115 
1116  /* Check if there are any externally stored columns.
1117  For each externally stored column, store the
1118  BTR_EXTERN_FIELD_REF separately. */
1119  if (rec_offs_any_extern(offsets)) {
1120  ut_ad(dict_index_is_clust(index));
1121 
1122  err = page_zip_compress_clust_ext(
1123  LOGFILE
1124  c_stream, rec, offsets, trx_id_col,
1125  deleted, storage, &externs, n_blobs);
1126 
1127  if (UNIV_UNLIKELY(err != Z_OK)) {
1128 
1129  goto func_exit;
1130  }
1131  } else {
1132  ulint len;
1133  const byte* src;
1134 
1135  /* Store trx_id and roll_ptr in uncompressed form. */
1136  src = rec_get_nth_field(rec, offsets,
1137  trx_id_col, &len);
1138  ut_ad(src + DATA_TRX_ID_LEN
1139  == rec_get_nth_field(rec, offsets,
1140  trx_id_col + 1, &len));
1141  ut_ad(len == DATA_ROLL_PTR_LEN);
1142  UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
1143  UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
1144  rec_offs_extra_size(offsets));
1145 
1146  /* Compress any preceding bytes. */
1147  c_stream->avail_in = src - c_stream->next_in;
1148 
1149  if (c_stream->avail_in) {
1150  err = deflate(c_stream, Z_NO_FLUSH);
1151  if (UNIV_UNLIKELY(err != Z_OK)) {
1152 
1153  return(err);
1154  }
1155  }
1156 
1157  ut_ad(!c_stream->avail_in);
1158  ut_ad(c_stream->next_in == src);
1159 
1160  memcpy(storage
1161  - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
1162  * (rec_get_heap_no_new(rec) - 1),
1163  c_stream->next_in,
1164  DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
1165 
1166  c_stream->next_in
1167  += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1168 
1169  /* Skip also roll_ptr */
1170  ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
1171  }
1172 
1173  /* Compress the last bytes of the record. */
1174  c_stream->avail_in = rec + rec_offs_data_size(offsets)
1175  - c_stream->next_in;
1176 
1177  if (c_stream->avail_in) {
1178  err = deflate(c_stream, Z_NO_FLUSH);
1179  if (UNIV_UNLIKELY(err != Z_OK)) {
1180 
1181  goto func_exit;
1182  }
1183  }
1184  ut_ad(!c_stream->avail_in);
1185  } while (--n_dense);
1186 
1187 func_exit:
1188  return(err);
1189 }
1190 
1191 /**********************************************************************/
1195 UNIV_INTERN
1196 ibool
1198 /*==============*/
1199  page_zip_des_t* page_zip,
1201  const page_t* page,
1202  dict_index_t* index,
1203  ulint level,
1204  mtr_t* mtr)
1205 {
1206  z_stream c_stream;
1207  int err;
1208  ulint n_fields;/* number of index fields needed */
1209  byte* fields;
1210  byte* buf;
1211  byte* buf_end;/* end of buf */
1212  ulint n_dense;
1213  ulint slot_size;/* amount of uncompressed bytes per record */
1214  const rec_t** recs;
1215  mem_heap_t* heap;
1216  ulint trx_id_col;
1217  ulint n_blobs = 0;
1218  byte* storage;/* storage of uncompressed columns */
1219 #ifndef UNIV_HOTBACKUP
1220  ullint usec = ut_time_us(NULL);
1221 #endif /* !UNIV_HOTBACKUP */
1222 #ifdef PAGE_ZIP_COMPRESS_DBG
1223  FILE* logfile = NULL;
1224 #endif
1225  /* A local copy of srv_cmp_per_index_enabled to avoid reading that
1226  variable multiple times in this function since it can be changed at
1227  anytime. */
1228  my_bool cmp_per_index_enabled = srv_cmp_per_index_enabled;
1229 
1230  ut_a(page_is_comp(page));
1233  ut_ad(page_zip_simple_validate(page_zip));
1234  ut_ad(dict_table_is_comp(index->table));
1235  ut_ad(!dict_index_is_ibuf(index));
1236 
1237  UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
1238 
1239  /* Check the data that will be omitted. */
1240  ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
1241  infimum_extra, sizeof infimum_extra));
1242  ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
1243  infimum_data, sizeof infimum_data));
1244  ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
1245  /* info_bits == 0, n_owned <= max */
1246  <= PAGE_DIR_SLOT_MAX_N_OWNED);
1247  ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
1248  supremum_extra_data, sizeof supremum_extra_data));
1249 
1250  if (page_is_empty(page)) {
1251  ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
1252  == PAGE_NEW_SUPREMUM);
1253  }
1254 
1255  if (page_is_leaf(page)) {
1256  n_fields = dict_index_get_n_fields(index);
1257  } else {
1258  n_fields = dict_index_get_n_unique_in_tree(index);
1259  }
1260 
1261  /* The dense directory excludes the infimum and supremum records. */
1262  n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1263 #ifdef PAGE_ZIP_COMPRESS_DBG
1264  if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
1265  fprintf(stderr, "compress %p %p %lu %lu %lu\n",
1266  (void*) page_zip, (void*) page,
1267  (ibool) page_is_leaf(page),
1268  n_fields, n_dense);
1269  }
1270  if (UNIV_UNLIKELY(page_zip_compress_log)) {
1271  /* Create a log file for every compression attempt. */
1272  char logfilename[9];
1273  ut_snprintf(logfilename, sizeof logfilename,
1274  "%08x", page_zip_compress_log++);
1275  logfile = fopen(logfilename, "wb");
1276 
1277  if (logfile) {
1278  /* Write the uncompressed page to the log. */
1279  fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
1280  /* Record the compressed size as zero.
1281  This will be overwritten at successful exit. */
1282  putc(0, logfile);
1283  putc(0, logfile);
1284  putc(0, logfile);
1285  putc(0, logfile);
1286  }
1287  }
1288 #endif /* PAGE_ZIP_COMPRESS_DBG */
1289 #ifndef UNIV_HOTBACKUP
1290  page_zip_stat[page_zip->ssize - 1].compressed++;
1291  if (cmp_per_index_enabled) {
1292  mutex_enter(&page_zip_stat_per_index_mutex);
1293  page_zip_stat_per_index[index->id].compressed++;
1294  mutex_exit(&page_zip_stat_per_index_mutex);
1295  }
1296 #endif /* !UNIV_HOTBACKUP */
1297 
1298  if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
1299  >= page_zip_get_size(page_zip))) {
1300 
1301  goto err_exit;
1302  }
1303 
1304  MONITOR_INC(MONITOR_PAGE_COMPRESS);
1305 
1306  heap = mem_heap_create(page_zip_get_size(page_zip)
1307  + n_fields * (2 + sizeof(ulint))
1308  + REC_OFFS_HEADER_SIZE
1309  + n_dense * ((sizeof *recs)
1310  - PAGE_ZIP_DIR_SLOT_SIZE)
1311  + UNIV_PAGE_SIZE * 4
1312  + (512 << MAX_MEM_LEVEL));
1313 
1314  recs = static_cast<const rec_t**>(
1315  mem_heap_zalloc(heap, n_dense * sizeof *recs));
1316 
1317  fields = static_cast<byte*>(mem_heap_alloc(heap, (n_fields + 1) * 2));
1318 
1319  buf = static_cast<byte*>(
1320  mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA));
1321 
1322  buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
1323 
1324  /* Compress the data payload. */
1325  page_zip_set_alloc(&c_stream, heap);
1326 
1327  err = deflateInit2(&c_stream, level,
1328  Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
1329  MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
1330  ut_a(err == Z_OK);
1331 
1332  c_stream.next_out = buf;
1333  /* Subtract the space reserved for uncompressed data. */
1334  /* Page header and the end marker of the modification log */
1335  c_stream.avail_out = buf_end - buf - 1;
1336  /* Dense page directory and uncompressed columns, if any */
1337  if (page_is_leaf(page)) {
1338  if (dict_index_is_clust(index)) {
1339  trx_id_col = dict_index_get_sys_col_pos(
1340  index, DATA_TRX_ID);
1341  ut_ad(trx_id_col > 0);
1342  ut_ad(trx_id_col != ULINT_UNDEFINED);
1343 
1344  slot_size = PAGE_ZIP_DIR_SLOT_SIZE
1345  + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
1346  } else {
1347  /* Signal the absence of trx_id
1348  in page_zip_fields_encode() */
1349  ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
1350  == ULINT_UNDEFINED);
1351  trx_id_col = 0;
1352  slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
1353  }
1354  } else {
1355  slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
1356  trx_id_col = ULINT_UNDEFINED;
1357  }
1358 
1359  if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
1360  + 6/* sizeof(zlib header and footer) */)) {
1361  goto zlib_error;
1362  }
1363 
1364  c_stream.avail_out -= n_dense * slot_size;
1365  c_stream.avail_in = page_zip_fields_encode(n_fields, index,
1366  trx_id_col, fields);
1367  c_stream.next_in = fields;
1368  if (UNIV_LIKELY(!trx_id_col)) {
1369  trx_id_col = ULINT_UNDEFINED;
1370  }
1371 
1372  UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1373  err = deflate(&c_stream, Z_FULL_FLUSH);
1374  if (err != Z_OK) {
1375  goto zlib_error;
1376  }
1377 
1378  ut_ad(!c_stream.avail_in);
1379 
1380  page_zip_dir_encode(page, buf_end, recs);
1381 
1382  c_stream.next_in = (byte*) page + PAGE_ZIP_START;
1383 
1384  storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
1385 
1386  /* Compress the records in heap_no order. */
1387  if (UNIV_UNLIKELY(!n_dense)) {
1388  } else if (!page_is_leaf(page)) {
1389  /* This is a node pointer page. */
1390  err = page_zip_compress_node_ptrs(LOGFILE
1391  &c_stream, recs, n_dense,
1392  index, storage, heap);
1393  if (UNIV_UNLIKELY(err != Z_OK)) {
1394  goto zlib_error;
1395  }
1396  } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
1397  /* This is a leaf page in a secondary index. */
1398  err = page_zip_compress_sec(LOGFILE
1399  &c_stream, recs, n_dense);
1400  if (UNIV_UNLIKELY(err != Z_OK)) {
1401  goto zlib_error;
1402  }
1403  } else {
1404  /* This is a leaf page in a clustered index. */
1405  err = page_zip_compress_clust(LOGFILE
1406  &c_stream, recs, n_dense,
1407  index, &n_blobs, trx_id_col,
1408  buf_end - PAGE_ZIP_DIR_SLOT_SIZE
1409  * page_get_n_recs(page),
1410  storage, heap);
1411  if (UNIV_UNLIKELY(err != Z_OK)) {
1412  goto zlib_error;
1413  }
1414  }
1415 
1416  /* Finish the compression. */
1417  ut_ad(!c_stream.avail_in);
1418  /* Compress any trailing garbage, in case the last record was
1419  allocated from an originally longer space on the free list,
1420  or the data of the last record from page_zip_compress_sec(). */
1421  c_stream.avail_in
1422  = page_header_get_field(page, PAGE_HEAP_TOP)
1423  - (c_stream.next_in - page);
1424  ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
1425 
1426  UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
1427  err = deflate(&c_stream, Z_FINISH);
1428 
1429  if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
1430 zlib_error:
1431  deflateEnd(&c_stream);
1432  mem_heap_free(heap);
1433 err_exit:
1434 #ifdef PAGE_ZIP_COMPRESS_DBG
1435  if (logfile) {
1436  fclose(logfile);
1437  }
1438 #endif /* PAGE_ZIP_COMPRESS_DBG */
1439 #ifndef UNIV_HOTBACKUP
1440  if (page_is_leaf(page)) {
1441  dict_index_zip_failure(index);
1442  }
1443 
1444  ullint time_diff = ut_time_us(NULL) - usec;
1445  page_zip_stat[page_zip->ssize - 1].compressed_usec
1446  += time_diff;
1447  if (cmp_per_index_enabled) {
1448  mutex_enter(&page_zip_stat_per_index_mutex);
1449  page_zip_stat_per_index[index->id].compressed_usec
1450  += time_diff;
1451  mutex_exit(&page_zip_stat_per_index_mutex);
1452  }
1453 #endif /* !UNIV_HOTBACKUP */
1454  return(FALSE);
1455  }
1456 
1457  err = deflateEnd(&c_stream);
1458  ut_a(err == Z_OK);
1459 
1460  ut_ad(buf + c_stream.total_out == c_stream.next_out);
1461  ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
1462 
1463  /* Valgrind believes that zlib does not initialize some bits
1464  in the last 7 or 8 bytes of the stream. Make Valgrind happy. */
1465  UNIV_MEM_VALID(buf, c_stream.total_out);
1466 
1467  /* Zero out the area reserved for the modification log.
1468  Space for the end marker of the modification log is not
1469  included in avail_out. */
1470  memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
1471 
1472 #ifdef UNIV_DEBUG
1473  page_zip->m_start =
1474 #endif /* UNIV_DEBUG */
1475  page_zip->m_end = PAGE_DATA + c_stream.total_out;
1476  page_zip->m_nonempty = FALSE;
1477  page_zip->n_blobs = n_blobs;
1478  /* Copy those header fields that will not be written
1479  in buf_flush_init_for_writing() */
1480  memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
1482  memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
1483  memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
1484  PAGE_DATA - FIL_PAGE_DATA);
1485  /* Copy the rest of the compressed page */
1486  memcpy(page_zip->data + PAGE_DATA, buf,
1487  page_zip_get_size(page_zip) - PAGE_DATA);
1488  mem_heap_free(heap);
1489 #ifdef UNIV_ZIP_DEBUG
1490  ut_a(page_zip_validate(page_zip, page, index));
1491 #endif /* UNIV_ZIP_DEBUG */
1492 
1493  if (mtr) {
1494 #ifndef UNIV_HOTBACKUP
1495  page_zip_compress_write_log(page_zip, page, index, mtr);
1496 #endif /* !UNIV_HOTBACKUP */
1497  }
1498 
1499  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
1500 
1501 #ifdef PAGE_ZIP_COMPRESS_DBG
1502  if (logfile) {
1503  /* Record the compressed size of the block. */
1504  byte sz[4];
1505  mach_write_to_4(sz, c_stream.total_out);
1506  fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
1507  fwrite(sz, 1, sizeof sz, logfile);
1508  fclose(logfile);
1509  }
1510 #endif /* PAGE_ZIP_COMPRESS_DBG */
1511 #ifndef UNIV_HOTBACKUP
1512  ullint time_diff = ut_time_us(NULL) - usec;
1513  page_zip_stat[page_zip->ssize - 1].compressed_ok++;
1514  page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff;
1515  if (cmp_per_index_enabled) {
1516  mutex_enter(&page_zip_stat_per_index_mutex);
1517  page_zip_stat_per_index[index->id].compressed_ok++;
1518  page_zip_stat_per_index[index->id].compressed_usec += time_diff;
1519  mutex_exit(&page_zip_stat_per_index_mutex);
1520  }
1521 
1522  if (page_is_leaf(page)) {
1523  dict_index_zip_success(index);
1524  }
1525 #endif /* !UNIV_HOTBACKUP */
1526 
1527  return(TRUE);
1528 }
1529 
1530 /**********************************************************************/
1533 UNIV_INLINE
1534 ibool
1536 /*=============*/
1537  const rec_t* rec1,
1538  const rec_t* rec2)
1539 {
1540  return(rec1 > rec2);
1541 }
1542 
1543 /**********************************************************************/
1545 static
1546 void
1547 page_zip_dir_sort(
1548 /*==============*/
1549  rec_t** arr,
1550  rec_t** aux_arr,
1551  ulint low,
1552  ulint high)
1553 {
1554  UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
1556 }
1557 
1558 /**********************************************************************/
1560 static
1561 void
1562 page_zip_fields_free(
1563 /*=================*/
1564  dict_index_t* index)
1565 {
1566  if (index) {
1567  dict_table_t* table = index->table;
1568  os_fast_mutex_free(&index->zip_pad.mutex);
1569  mem_heap_free(index->heap);
1570  mutex_free(&(table->autoinc_mutex));
1571  ut_free(table->name);
1572  mem_heap_free(table->heap);
1573  }
1574 }
1575 
1576 /**********************************************************************/
1579 static
1580 dict_index_t*
1581 page_zip_fields_decode(
1582 /*===================*/
1583  const byte* buf,
1584  const byte* end,
1585  ulint* trx_id_col)
1588 {
1589  const byte* b;
1590  ulint n;
1591  ulint i;
1592  ulint val;
1595 
1596  /* Determine the number of fields. */
1597  for (b = buf, n = 0; b < end; n++) {
1598  if (*b++ & 0x80) {
1599  b++; /* skip the second byte */
1600  }
1601  }
1602 
1603  n--; /* n_nullable or trx_id */
1604 
1605  if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
1606 
1607  page_zip_fail(("page_zip_fields_decode: n = %lu\n",
1608  (ulong) n));
1609  return(NULL);
1610  }
1611 
1612  if (UNIV_UNLIKELY(b > end)) {
1613 
1614  page_zip_fail(("page_zip_fields_decode: %p > %p\n",
1615  (const void*) b, (const void*) end));
1616  return(NULL);
1617  }
1618 
1619  table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
1620  DICT_TF_COMPACT, 0);
1621  index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
1622  DICT_HDR_SPACE, 0, n);
1623  index->table = table;
1624  index->n_uniq = n;
1625  /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1626  index->cached = TRUE;
1627 
1628  /* Initialize the fields. */
1629  for (b = buf, i = 0; i < n; i++) {
1630  ulint mtype;
1631  ulint len;
1632 
1633  val = *b++;
1634 
1635  if (UNIV_UNLIKELY(val & 0x80)) {
1636  /* fixed length > 62 bytes */
1637  val = (val & 0x7f) << 8 | *b++;
1638  len = val >> 1;
1639  mtype = DATA_FIXBINARY;
1640  } else if (UNIV_UNLIKELY(val >= 126)) {
1641  /* variable length with max > 255 bytes */
1642  len = 0x7fff;
1643  mtype = DATA_BINARY;
1644  } else if (val <= 1) {
1645  /* variable length with max <= 255 bytes */
1646  len = 0;
1647  mtype = DATA_BINARY;
1648  } else {
1649  /* fixed length < 62 bytes */
1650  len = val >> 1;
1651  mtype = DATA_FIXBINARY;
1652  }
1653 
1654  dict_mem_table_add_col(table, NULL, NULL, mtype,
1655  val & 1 ? DATA_NOT_NULL : 0, len);
1656  dict_index_add_col(index, table,
1657  dict_table_get_nth_col(table, i), 0);
1658  }
1659 
1660  val = *b++;
1661  if (UNIV_UNLIKELY(val & 0x80)) {
1662  val = (val & 0x7f) << 8 | *b++;
1663  }
1664 
1665  /* Decode the position of the trx_id column. */
1666  if (trx_id_col) {
1667  if (!val) {
1668  val = ULINT_UNDEFINED;
1669  } else if (UNIV_UNLIKELY(val >= n)) {
1670  page_zip_fields_free(index);
1671  index = NULL;
1672  } else {
1673  index->type = DICT_CLUSTERED;
1674  }
1675 
1676  *trx_id_col = val;
1677  } else {
1678  /* Decode the number of nullable fields. */
1679  if (UNIV_UNLIKELY(index->n_nullable > val)) {
1680  page_zip_fields_free(index);
1681  index = NULL;
1682  } else {
1683  index->n_nullable = val;
1684  }
1685  }
1686 
1687  ut_ad(b == end);
1688 
1689  return(index);
1690 }
1691 
1692 /**********************************************************************/
1695 static
1696 ibool
1697 page_zip_dir_decode(
1698 /*================*/
1699  const page_zip_des_t* page_zip,
1701  page_t* page,
1704  rec_t** recs,
1706  rec_t** recs_aux,
1707  ulint n_dense)
1709 {
1710  ulint i;
1711  ulint n_recs;
1712  byte* slot;
1713 
1714  n_recs = page_get_n_recs(page);
1715 
1716  if (UNIV_UNLIKELY(n_recs > n_dense)) {
1717  page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
1718  (ulong) n_recs, (ulong) n_dense));
1719  return(FALSE);
1720  }
1721 
1722  /* Traverse the list of stored records in the sorting order,
1723  starting from the first user record. */
1724 
1725  slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
1726  UNIV_PREFETCH_RW(slot);
1727 
1728  /* Zero out the page trailer. */
1729  memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
1730 
1731  mach_write_to_2(slot, PAGE_NEW_INFIMUM);
1732  slot -= PAGE_DIR_SLOT_SIZE;
1733  UNIV_PREFETCH_RW(slot);
1734 
1735  /* Initialize the sparse directory and copy the dense directory. */
1736  for (i = 0; i < n_recs; i++) {
1737  ulint offs = page_zip_dir_get(page_zip, i);
1738 
1739  if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
1740  mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
1741  slot -= PAGE_DIR_SLOT_SIZE;
1742  UNIV_PREFETCH_RW(slot);
1743  }
1744 
1745  if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
1746  < PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
1747  page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
1748  (unsigned) i, (unsigned) n_recs,
1749  (ulong) offs));
1750  return(FALSE);
1751  }
1752 
1753  recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
1754  }
1755 
1756  mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
1757  {
1758  const page_dir_slot_t* last_slot = page_dir_get_nth_slot(
1759  page, page_dir_get_n_slots(page) - 1);
1760 
1761  if (UNIV_UNLIKELY(slot != last_slot)) {
1762  page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
1763  (const void*) slot,
1764  (const void*) last_slot));
1765  return(FALSE);
1766  }
1767  }
1768 
1769  /* Copy the rest of the dense directory. */
1770  for (; i < n_dense; i++) {
1771  ulint offs = page_zip_dir_get(page_zip, i);
1772 
1773  if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1774  page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
1775  (unsigned) i, (unsigned) n_dense,
1776  (ulong) offs));
1777  return(FALSE);
1778  }
1779 
1780  recs[i] = page + offs;
1781  }
1782 
1783  if (UNIV_LIKELY(n_dense > 1)) {
1784  page_zip_dir_sort(recs, recs_aux, 0, n_dense);
1785  }
1786  return(TRUE);
1787 }
1788 
1789 /**********************************************************************/
1792 static
1793 ibool
1794 page_zip_set_extra_bytes(
1795 /*=====================*/
1796  const page_zip_des_t* page_zip,
1797  page_t* page,
1798  ulint info_bits)
1799 {
1800  ulint n;
1801  ulint i;
1802  ulint n_owned = 1;
1803  ulint offs;
1804  rec_t* rec;
1805 
1806  n = page_get_n_recs(page);
1807  rec = page + PAGE_NEW_INFIMUM;
1808 
1809  for (i = 0; i < n; i++) {
1810  offs = page_zip_dir_get(page_zip, i);
1811 
1812  if (offs & PAGE_ZIP_DIR_SLOT_DEL) {
1813  info_bits |= REC_INFO_DELETED_FLAG;
1814  }
1815  if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
1816  info_bits |= n_owned;
1817  n_owned = 1;
1818  } else {
1819  n_owned++;
1820  }
1821  offs &= PAGE_ZIP_DIR_SLOT_MASK;
1822  if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
1823  + REC_N_NEW_EXTRA_BYTES)) {
1824  page_zip_fail(("page_zip_set_extra_bytes 1:"
1825  " %u %u %lx\n",
1826  (unsigned) i, (unsigned) n,
1827  (ulong) offs));
1828  return(FALSE);
1829  }
1830 
1831  rec_set_next_offs_new(rec, offs);
1832  rec = page + offs;
1833  rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
1834  info_bits = 0;
1835  }
1836 
1837  /* Set the next pointer of the last user record. */
1838  rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
1839 
1840  /* Set n_owned of the supremum record. */
1841  page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
1842 
1843  /* The dense directory excludes the infimum and supremum records. */
1844  n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
1845 
1846  if (i >= n) {
1847  if (UNIV_LIKELY(i == n)) {
1848  return(TRUE);
1849  }
1850 
1851  page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
1852  (unsigned) i, (unsigned) n));
1853  return(FALSE);
1854  }
1855 
1856  offs = page_zip_dir_get(page_zip, i);
1857 
1858  /* Set the extra bytes of deleted records on the free list. */
1859  for (;;) {
1860  if (UNIV_UNLIKELY(!offs)
1861  || UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
1862 
1863  page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
1864  (ulong) offs));
1865  return(FALSE);
1866  }
1867 
1868  rec = page + offs;
1869  rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1870 
1871  if (++i == n) {
1872  break;
1873  }
1874 
1875  offs = page_zip_dir_get(page_zip, i);
1876  rec_set_next_offs_new(rec, offs);
1877  }
1878 
1879  /* Terminate the free list. */
1880  rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
1881  rec_set_next_offs_new(rec, 0);
1882 
1883  return(TRUE);
1884 }
1885 
1886 /**********************************************************************/
1890 static
1891 const byte*
1892 page_zip_apply_log_ext(
1893 /*===================*/
1894  rec_t* rec,
1895  const ulint* offsets,
1896  ulint trx_id_col,
1897  const byte* data,
1898  const byte* end)
1899 {
1900  ulint i;
1901  ulint len;
1902  byte* next_out = rec;
1903 
1904  /* Check if there are any externally stored columns.
1905  For each externally stored column, skip the
1906  BTR_EXTERN_FIELD_REF. */
1907 
1908  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
1909  byte* dst;
1910 
1911  if (UNIV_UNLIKELY(i == trx_id_col)) {
1912  /* Skip trx_id and roll_ptr */
1913  dst = rec_get_nth_field(rec, offsets,
1914  i, &len);
1915  if (UNIV_UNLIKELY(dst - next_out >= end - data)
1916  || UNIV_UNLIKELY
1917  (len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
1918  || rec_offs_nth_extern(offsets, i)) {
1919  page_zip_fail(("page_zip_apply_log_ext:"
1920  " trx_id len %lu,"
1921  " %p - %p >= %p - %p\n",
1922  (ulong) len,
1923  (const void*) dst,
1924  (const void*) next_out,
1925  (const void*) end,
1926  (const void*) data));
1927  return(NULL);
1928  }
1929 
1930  memcpy(next_out, data, dst - next_out);
1931  data += dst - next_out;
1932  next_out = dst + (DATA_TRX_ID_LEN
1933  + DATA_ROLL_PTR_LEN);
1934  } else if (rec_offs_nth_extern(offsets, i)) {
1935  dst = rec_get_nth_field(rec, offsets,
1936  i, &len);
1937  ut_ad(len
1938  >= BTR_EXTERN_FIELD_REF_SIZE);
1939 
1940  len += dst - next_out
1942 
1943  if (UNIV_UNLIKELY(data + len >= end)) {
1944  page_zip_fail(("page_zip_apply_log_ext: "
1945  "ext %p+%lu >= %p\n",
1946  (const void*) data,
1947  (ulong) len,
1948  (const void*) end));
1949  return(NULL);
1950  }
1951 
1952  memcpy(next_out, data, len);
1953  data += len;
1954  next_out += len
1956  }
1957  }
1958 
1959  /* Copy the last bytes of the record. */
1960  len = rec_get_end(rec, offsets) - next_out;
1961  if (UNIV_UNLIKELY(data + len >= end)) {
1962  page_zip_fail(("page_zip_apply_log_ext: "
1963  "last %p+%lu >= %p\n",
1964  (const void*) data,
1965  (ulong) len,
1966  (const void*) end));
1967  return(NULL);
1968  }
1969  memcpy(next_out, data, len);
1970  data += len;
1971 
1972  return(data);
1973 }
1974 
1975 /**********************************************************************/
1979 static
1980 const byte*
1981 page_zip_apply_log(
1982 /*===============*/
1983  const byte* data,
1984  ulint size,
1985  rec_t** recs,
1988  ulint n_dense,
1989  ulint trx_id_col,
1991  ulint heap_status,
1994  dict_index_t* index,
1995  ulint* offsets)
1997 {
1998  const byte* const end = data + size;
1999 
2000  for (;;) {
2001  ulint val;
2002  rec_t* rec;
2003  ulint len;
2004  ulint hs;
2005 
2006  val = *data++;
2007  if (UNIV_UNLIKELY(!val)) {
2008  return(data - 1);
2009  }
2010  if (val & 0x80) {
2011  val = (val & 0x7f) << 8 | *data++;
2012  if (UNIV_UNLIKELY(!val)) {
2013  page_zip_fail(("page_zip_apply_log:"
2014  " invalid val %x%x\n",
2015  data[-2], data[-1]));
2016  return(NULL);
2017  }
2018  }
2019  if (UNIV_UNLIKELY(data >= end)) {
2020  page_zip_fail(("page_zip_apply_log: %p >= %p\n",
2021  (const void*) data,
2022  (const void*) end));
2023  return(NULL);
2024  }
2025  if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
2026  page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
2027  (ulong) val, (ulong) n_dense));
2028  return(NULL);
2029  }
2030 
2031  /* Determine the heap number and status bits of the record. */
2032  rec = recs[(val >> 1) - 1];
2033 
2034  hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
2035  hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
2036 
2037  /* This may either be an old record that is being
2038  overwritten (updated in place, or allocated from
2039  the free list), or a new record, with the next
2040  available_heap_no. */
2041  if (UNIV_UNLIKELY(hs > heap_status)) {
2042  page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
2043  (ulong) hs, (ulong) heap_status));
2044  return(NULL);
2045  } else if (hs == heap_status) {
2046  /* A new record was allocated from the heap. */
2047  if (UNIV_UNLIKELY(val & 1)) {
2048  /* Only existing records may be cleared. */
2049  page_zip_fail(("page_zip_apply_log:"
2050  " attempting to create"
2051  " deleted rec %lu\n",
2052  (ulong) hs));
2053  return(NULL);
2054  }
2055  heap_status += 1 << REC_HEAP_NO_SHIFT;
2056  }
2057 
2058  mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
2059 
2060  if (val & 1) {
2061  /* Clear the data bytes of the record. */
2062  mem_heap_t* heap = NULL;
2063  ulint* offs;
2064  offs = rec_get_offsets(rec, index, offsets,
2065  ULINT_UNDEFINED, &heap);
2066  memset(rec, 0, rec_offs_data_size(offs));
2067 
2068  if (UNIV_LIKELY_NULL(heap)) {
2069  mem_heap_free(heap);
2070  }
2071  continue;
2072  }
2073 
2074 #if REC_STATUS_NODE_PTR != TRUE
2075 # error "REC_STATUS_NODE_PTR != TRUE"
2076 #endif
2077  rec_get_offsets_reverse(data, index,
2078  hs & REC_STATUS_NODE_PTR,
2079  offsets);
2080  rec_offs_make_valid(rec, index, offsets);
2081 
2082  /* Copy the extra bytes (backwards). */
2083  {
2084  byte* start = rec_get_start(rec, offsets);
2085  byte* b = rec - REC_N_NEW_EXTRA_BYTES;
2086  while (b != start) {
2087  *--b = *data++;
2088  }
2089  }
2090 
2091  /* Copy the data bytes. */
2092  if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
2093  /* Non-leaf nodes should not contain any
2094  externally stored columns. */
2095  if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
2096  page_zip_fail(("page_zip_apply_log: "
2097  "%lu&REC_STATUS_NODE_PTR\n",
2098  (ulong) hs));
2099  return(NULL);
2100  }
2101 
2102  data = page_zip_apply_log_ext(
2103  rec, offsets, trx_id_col, data, end);
2104 
2105  if (UNIV_UNLIKELY(!data)) {
2106  return(NULL);
2107  }
2108  } else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
2109  len = rec_offs_data_size(offsets)
2110  - REC_NODE_PTR_SIZE;
2111  /* Copy the data bytes, except node_ptr. */
2112  if (UNIV_UNLIKELY(data + len >= end)) {
2113  page_zip_fail(("page_zip_apply_log: "
2114  "node_ptr %p+%lu >= %p\n",
2115  (const void*) data,
2116  (ulong) len,
2117  (const void*) end));
2118  return(NULL);
2119  }
2120  memcpy(rec, data, len);
2121  data += len;
2122  } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
2123  len = rec_offs_data_size(offsets);
2124 
2125  /* Copy all data bytes of
2126  a record in a secondary index. */
2127  if (UNIV_UNLIKELY(data + len >= end)) {
2128  page_zip_fail(("page_zip_apply_log: "
2129  "sec %p+%lu >= %p\n",
2130  (const void*) data,
2131  (ulong) len,
2132  (const void*) end));
2133  return(NULL);
2134  }
2135 
2136  memcpy(rec, data, len);
2137  data += len;
2138  } else {
2139  /* Skip DB_TRX_ID and DB_ROLL_PTR. */
2140  ulint l = rec_get_nth_field_offs(offsets,
2141  trx_id_col, &len);
2142  byte* b;
2143 
2144  if (UNIV_UNLIKELY(data + l >= end)
2145  || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
2146  + DATA_ROLL_PTR_LEN))) {
2147  page_zip_fail(("page_zip_apply_log: "
2148  "trx_id %p+%lu >= %p\n",
2149  (const void*) data,
2150  (ulong) l,
2151  (const void*) end));
2152  return(NULL);
2153  }
2154 
2155  /* Copy any preceding data bytes. */
2156  memcpy(rec, data, l);
2157  data += l;
2158 
2159  /* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
2160  b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2161  len = rec_get_end(rec, offsets) - b;
2162  if (UNIV_UNLIKELY(data + len >= end)) {
2163  page_zip_fail(("page_zip_apply_log: "
2164  "clust %p+%lu >= %p\n",
2165  (const void*) data,
2166  (ulong) len,
2167  (const void*) end));
2168  return(NULL);
2169  }
2170  memcpy(b, data, len);
2171  data += len;
2172  }
2173  }
2174 }
2175 
2176 /**********************************************************************/
2180 static
2181 ibool
2182 page_zip_decompress_heap_no(
2183 /*========================*/
2184  z_stream* d_stream,
2185  rec_t* rec,
2186  ulint& heap_status)
2187 {
2188  if (d_stream->next_out != rec - REC_N_NEW_EXTRA_BYTES) {
2189  /* n_dense has grown since the page was last compressed. */
2190  return(FALSE);
2191  }
2192 
2193  /* Skip the REC_N_NEW_EXTRA_BYTES. */
2194  d_stream->next_out = rec;
2195 
2196  /* Set heap_no and the status bits. */
2197  mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
2198  heap_status += 1 << REC_HEAP_NO_SHIFT;
2199  return(TRUE);
2200 }
2201 
2202 /**********************************************************************/
2205 static
2206 ibool
2207 page_zip_decompress_node_ptrs(
2208 /*==========================*/
2209  page_zip_des_t* page_zip,
2210  z_stream* d_stream,
2211  rec_t** recs,
2213  ulint n_dense,
2214  dict_index_t* index,
2215  ulint* offsets,
2216  mem_heap_t* heap)
2217 {
2218  ulint heap_status = REC_STATUS_NODE_PTR
2219  | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2220  ulint slot;
2221  const byte* storage;
2222 
2223  /* Subtract the space reserved for uncompressed data. */
2224  d_stream->avail_in -= n_dense
2225  * (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE);
2226 
2227  /* Decompress the records in heap_no order. */
2228  for (slot = 0; slot < n_dense; slot++) {
2229  rec_t* rec = recs[slot];
2230 
2231  d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2232  - d_stream->next_out;
2233 
2234  ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2235  - PAGE_ZIP_START - PAGE_DIR);
2236  switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2237  case Z_STREAM_END:
2238  page_zip_decompress_heap_no(
2239  d_stream, rec, heap_status);
2240  goto zlib_done;
2241  case Z_OK:
2242  case Z_BUF_ERROR:
2243  if (!d_stream->avail_out) {
2244  break;
2245  }
2246  /* fall through */
2247  default:
2248  page_zip_fail(("page_zip_decompress_node_ptrs:"
2249  " 1 inflate(Z_SYNC_FLUSH)=%s\n",
2250  d_stream->msg));
2251  goto zlib_error;
2252  }
2253 
2254  if (!page_zip_decompress_heap_no(
2255  d_stream, rec, heap_status)) {
2256  ut_ad(0);
2257  }
2258 
2259  /* Read the offsets. The status bits are needed here. */
2260  offsets = rec_get_offsets(rec, index, offsets,
2261  ULINT_UNDEFINED, &heap);
2262 
2263  /* Non-leaf nodes should not have any externally
2264  stored columns. */
2265  ut_ad(!rec_offs_any_extern(offsets));
2266 
2267  /* Decompress the data bytes, except node_ptr. */
2268  d_stream->avail_out = rec_offs_data_size(offsets)
2269  - REC_NODE_PTR_SIZE;
2270 
2271  switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2272  case Z_STREAM_END:
2273  goto zlib_done;
2274  case Z_OK:
2275  case Z_BUF_ERROR:
2276  if (!d_stream->avail_out) {
2277  break;
2278  }
2279  /* fall through */
2280  default:
2281  page_zip_fail(("page_zip_decompress_node_ptrs:"
2282  " 2 inflate(Z_SYNC_FLUSH)=%s\n",
2283  d_stream->msg));
2284  goto zlib_error;
2285  }
2286 
2287  /* Clear the node pointer in case the record
2288  will be deleted and the space will be reallocated
2289  to a smaller record. */
2290  memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
2291  d_stream->next_out += REC_NODE_PTR_SIZE;
2292 
2293  ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
2294  }
2295 
2296  /* Decompress any trailing garbage, in case the last record was
2297  allocated from an originally longer space on the free list. */
2298  d_stream->avail_out = page_header_get_field(page_zip->data,
2299  PAGE_HEAP_TOP)
2300  - page_offset(d_stream->next_out);
2301  if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2302  - PAGE_ZIP_START - PAGE_DIR)) {
2303 
2304  page_zip_fail(("page_zip_decompress_node_ptrs:"
2305  " avail_out = %u\n",
2306  d_stream->avail_out));
2307  goto zlib_error;
2308  }
2309 
2310  if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2311  page_zip_fail(("page_zip_decompress_node_ptrs:"
2312  " inflate(Z_FINISH)=%s\n",
2313  d_stream->msg));
2314 zlib_error:
2315  inflateEnd(d_stream);
2316  return(FALSE);
2317  }
2318 
2319  /* Note that d_stream->avail_out > 0 may hold here
2320  if the modification log is nonempty. */
2321 
2322 zlib_done:
2323  if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2324  ut_error;
2325  }
2326 
2327  {
2328  page_t* page = page_align(d_stream->next_out);
2329 
2330  /* Clear the unused heap space on the uncompressed page. */
2331  memset(d_stream->next_out, 0,
2332  page_dir_get_nth_slot(page,
2333  page_dir_get_n_slots(page) - 1)
2334  - d_stream->next_out);
2335  }
2336 
2337 #ifdef UNIV_DEBUG
2338  page_zip->m_start = PAGE_DATA + d_stream->total_in;
2339 #endif /* UNIV_DEBUG */
2340 
2341  /* Apply the modification log. */
2342  {
2343  const byte* mod_log_ptr;
2344  mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2345  d_stream->avail_in + 1,
2346  recs, n_dense,
2347  ULINT_UNDEFINED, heap_status,
2348  index, offsets);
2349 
2350  if (UNIV_UNLIKELY(!mod_log_ptr)) {
2351  return(FALSE);
2352  }
2353  page_zip->m_end = mod_log_ptr - page_zip->data;
2354  page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2355  }
2356 
2357  if (UNIV_UNLIKELY
2358  (page_zip_get_trailer_len(page_zip,
2359  dict_index_is_clust(index))
2360  + page_zip->m_end >= page_zip_get_size(page_zip))) {
2361  page_zip_fail(("page_zip_decompress_node_ptrs:"
2362  " %lu + %lu >= %lu, %lu\n",
2363  (ulong) page_zip_get_trailer_len(
2364  page_zip, dict_index_is_clust(index)),
2365  (ulong) page_zip->m_end,
2366  (ulong) page_zip_get_size(page_zip),
2367  (ulong) dict_index_is_clust(index)));
2368  return(FALSE);
2369  }
2370 
2371  /* Restore the uncompressed columns in heap_no order. */
2372  storage = page_zip_dir_start_low(page_zip, n_dense);
2373 
2374  for (slot = 0; slot < n_dense; slot++) {
2375  rec_t* rec = recs[slot];
2376 
2377  offsets = rec_get_offsets(rec, index, offsets,
2378  ULINT_UNDEFINED, &heap);
2379  /* Non-leaf nodes should not have any externally
2380  stored columns. */
2381  ut_ad(!rec_offs_any_extern(offsets));
2382  storage -= REC_NODE_PTR_SIZE;
2383 
2384  memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
2385  storage, REC_NODE_PTR_SIZE);
2386  }
2387 
2388  return(TRUE);
2389 }
2390 
2391 /**********************************************************************/
2394 static
2395 ibool
2396 page_zip_decompress_sec(
2397 /*====================*/
2398  page_zip_des_t* page_zip,
2399  z_stream* d_stream,
2400  rec_t** recs,
2402  ulint n_dense,
2403  dict_index_t* index,
2404  ulint* offsets)
2405 {
2406  ulint heap_status = REC_STATUS_ORDINARY
2407  | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2408  ulint slot;
2409 
2410  ut_a(!dict_index_is_clust(index));
2411 
2412  /* Subtract the space reserved for uncompressed data. */
2413  d_stream->avail_in -= n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
2414 
2415  for (slot = 0; slot < n_dense; slot++) {
2416  rec_t* rec = recs[slot];
2417 
2418  /* Decompress everything up to this record. */
2419  d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2420  - d_stream->next_out;
2421 
2422  if (UNIV_LIKELY(d_stream->avail_out)) {
2423  switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2424  case Z_STREAM_END:
2425  page_zip_decompress_heap_no(
2426  d_stream, rec, heap_status);
2427  goto zlib_done;
2428  case Z_OK:
2429  case Z_BUF_ERROR:
2430  if (!d_stream->avail_out) {
2431  break;
2432  }
2433  /* fall through */
2434  default:
2435  page_zip_fail(("page_zip_decompress_sec:"
2436  " inflate(Z_SYNC_FLUSH)=%s\n",
2437  d_stream->msg));
2438  goto zlib_error;
2439  }
2440  }
2441 
2442  if (!page_zip_decompress_heap_no(
2443  d_stream, rec, heap_status)) {
2444  ut_ad(0);
2445  }
2446  }
2447 
2448  /* Decompress the data of the last record and any trailing garbage,
2449  in case the last record was allocated from an originally longer space
2450  on the free list. */
2451  d_stream->avail_out = page_header_get_field(page_zip->data,
2452  PAGE_HEAP_TOP)
2453  - page_offset(d_stream->next_out);
2454  if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2455  - PAGE_ZIP_START - PAGE_DIR)) {
2456 
2457  page_zip_fail(("page_zip_decompress_sec:"
2458  " avail_out = %u\n",
2459  d_stream->avail_out));
2460  goto zlib_error;
2461  }
2462 
2463  if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2464  page_zip_fail(("page_zip_decompress_sec:"
2465  " inflate(Z_FINISH)=%s\n",
2466  d_stream->msg));
2467 zlib_error:
2468  inflateEnd(d_stream);
2469  return(FALSE);
2470  }
2471 
2472  /* Note that d_stream->avail_out > 0 may hold here
2473  if the modification log is nonempty. */
2474 
2475 zlib_done:
2476  if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2477  ut_error;
2478  }
2479 
2480  {
2481  page_t* page = page_align(d_stream->next_out);
2482 
2483  /* Clear the unused heap space on the uncompressed page. */
2484  memset(d_stream->next_out, 0,
2485  page_dir_get_nth_slot(page,
2486  page_dir_get_n_slots(page) - 1)
2487  - d_stream->next_out);
2488  }
2489 
2490 #ifdef UNIV_DEBUG
2491  page_zip->m_start = PAGE_DATA + d_stream->total_in;
2492 #endif /* UNIV_DEBUG */
2493 
2494  /* Apply the modification log. */
2495  {
2496  const byte* mod_log_ptr;
2497  mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2498  d_stream->avail_in + 1,
2499  recs, n_dense,
2500  ULINT_UNDEFINED, heap_status,
2501  index, offsets);
2502 
2503  if (UNIV_UNLIKELY(!mod_log_ptr)) {
2504  return(FALSE);
2505  }
2506  page_zip->m_end = mod_log_ptr - page_zip->data;
2507  page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2508  }
2509 
2510  if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE)
2511  + page_zip->m_end >= page_zip_get_size(page_zip))) {
2512 
2513  page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
2514  (ulong) page_zip_get_trailer_len(
2515  page_zip, FALSE),
2516  (ulong) page_zip->m_end,
2517  (ulong) page_zip_get_size(page_zip)));
2518  return(FALSE);
2519  }
2520 
2521  /* There are no uncompressed columns on leaf pages of
2522  secondary indexes. */
2523 
2524  return(TRUE);
2525 }
2526 
2527 /**********************************************************************/
2531 static
2532 ibool
2533 page_zip_decompress_clust_ext(
2534 /*==========================*/
2535  z_stream* d_stream,
2536  rec_t* rec,
2537  const ulint* offsets,
2538  ulint trx_id_col)
2539 {
2540  ulint i;
2541 
2542  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2543  ulint len;
2544  byte* dst;
2545 
2546  if (UNIV_UNLIKELY(i == trx_id_col)) {
2547  /* Skip trx_id and roll_ptr */
2548  dst = rec_get_nth_field(rec, offsets, i, &len);
2549  if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2550  + DATA_ROLL_PTR_LEN)) {
2551 
2552  page_zip_fail(("page_zip_decompress_clust_ext:"
2553  " len[%lu] = %lu\n",
2554  (ulong) i, (ulong) len));
2555  return(FALSE);
2556  }
2557 
2558  if (rec_offs_nth_extern(offsets, i)) {
2559 
2560  page_zip_fail(("page_zip_decompress_clust_ext:"
2561  " DB_TRX_ID at %lu is ext\n",
2562  (ulong) i));
2563  return(FALSE);
2564  }
2565 
2566  d_stream->avail_out = dst - d_stream->next_out;
2567 
2568  switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2569  case Z_STREAM_END:
2570  case Z_OK:
2571  case Z_BUF_ERROR:
2572  if (!d_stream->avail_out) {
2573  break;
2574  }
2575  /* fall through */
2576  default:
2577  page_zip_fail(("page_zip_decompress_clust_ext:"
2578  " 1 inflate(Z_SYNC_FLUSH)=%s\n",
2579  d_stream->msg));
2580  return(FALSE);
2581  }
2582 
2583  ut_ad(d_stream->next_out == dst);
2584 
2585  /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2586  avoid uninitialized bytes in case the record
2587  is affected by page_zip_apply_log(). */
2588  memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2589 
2590  d_stream->next_out += DATA_TRX_ID_LEN
2591  + DATA_ROLL_PTR_LEN;
2592  } else if (rec_offs_nth_extern(offsets, i)) {
2593  dst = rec_get_nth_field(rec, offsets, i, &len);
2594  ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
2595  dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2596 
2597  d_stream->avail_out = dst - d_stream->next_out;
2598  switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2599  case Z_STREAM_END:
2600  case Z_OK:
2601  case Z_BUF_ERROR:
2602  if (!d_stream->avail_out) {
2603  break;
2604  }
2605  /* fall through */
2606  default:
2607  page_zip_fail(("page_zip_decompress_clust_ext:"
2608  " 2 inflate(Z_SYNC_FLUSH)=%s\n",
2609  d_stream->msg));
2610  return(FALSE);
2611  }
2612 
2613  ut_ad(d_stream->next_out == dst);
2614 
2615  /* Clear the BLOB pointer in case
2616  the record will be deleted and the
2617  space will not be reused. Note that
2618  the final initialization of the BLOB
2619  pointers (copying from "externs"
2620  or clearing) will have to take place
2621  only after the page modification log
2622  has been applied. Otherwise, we
2623  could end up with an uninitialized
2624  BLOB pointer when a record is deleted,
2625  reallocated and deleted. */
2626  memset(d_stream->next_out, 0,
2627  BTR_EXTERN_FIELD_REF_SIZE);
2628  d_stream->next_out
2630  }
2631  }
2632 
2633  return(TRUE);
2634 }
2635 
2636 /**********************************************************************/
2639 static
2640 ibool
2641 page_zip_decompress_clust(
2642 /*======================*/
2643  page_zip_des_t* page_zip,
2644  z_stream* d_stream,
2645  rec_t** recs,
2647  ulint n_dense,
2648  dict_index_t* index,
2649  ulint trx_id_col,
2650  ulint* offsets,
2651  mem_heap_t* heap)
2652 {
2653  int err;
2654  ulint slot;
2655  ulint heap_status = REC_STATUS_ORDINARY
2656  | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
2657  const byte* storage;
2658  const byte* externs;
2659 
2660  ut_a(dict_index_is_clust(index));
2661 
2662  /* Subtract the space reserved for uncompressed data. */
2663  d_stream->avail_in -= n_dense * (PAGE_ZIP_DIR_SLOT_SIZE
2664  + DATA_TRX_ID_LEN
2665  + DATA_ROLL_PTR_LEN);
2666 
2667  /* Decompress the records in heap_no order. */
2668  for (slot = 0; slot < n_dense; slot++) {
2669  rec_t* rec = recs[slot];
2670 
2671  d_stream->avail_out = rec - REC_N_NEW_EXTRA_BYTES
2672  - d_stream->next_out;
2673 
2674  ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
2675  - PAGE_ZIP_START - PAGE_DIR);
2676  err = inflate(d_stream, Z_SYNC_FLUSH);
2677  switch (err) {
2678  case Z_STREAM_END:
2679  page_zip_decompress_heap_no(
2680  d_stream, rec, heap_status);
2681  goto zlib_done;
2682  case Z_OK:
2683  case Z_BUF_ERROR:
2684  if (UNIV_LIKELY(!d_stream->avail_out)) {
2685  break;
2686  }
2687  /* fall through */
2688  default:
2689  page_zip_fail(("page_zip_decompress_clust:"
2690  " 1 inflate(Z_SYNC_FLUSH)=%s\n",
2691  d_stream->msg));
2692  goto zlib_error;
2693  }
2694 
2695  if (!page_zip_decompress_heap_no(
2696  d_stream, rec, heap_status)) {
2697  ut_ad(0);
2698  }
2699 
2700  /* Read the offsets. The status bits are needed here. */
2701  offsets = rec_get_offsets(rec, index, offsets,
2702  ULINT_UNDEFINED, &heap);
2703 
2704  /* This is a leaf page in a clustered index. */
2705 
2706  /* Check if there are any externally stored columns.
2707  For each externally stored column, restore the
2708  BTR_EXTERN_FIELD_REF separately. */
2709 
2710  if (rec_offs_any_extern(offsets)) {
2711  if (UNIV_UNLIKELY
2712  (!page_zip_decompress_clust_ext(
2713  d_stream, rec, offsets, trx_id_col))) {
2714 
2715  goto zlib_error;
2716  }
2717  } else {
2718  /* Skip trx_id and roll_ptr */
2719  ulint len;
2720  byte* dst = rec_get_nth_field(rec, offsets,
2721  trx_id_col, &len);
2722  if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
2723  + DATA_ROLL_PTR_LEN)) {
2724 
2725  page_zip_fail(("page_zip_decompress_clust:"
2726  " len = %lu\n", (ulong) len));
2727  goto zlib_error;
2728  }
2729 
2730  d_stream->avail_out = dst - d_stream->next_out;
2731 
2732  switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2733  case Z_STREAM_END:
2734  case Z_OK:
2735  case Z_BUF_ERROR:
2736  if (!d_stream->avail_out) {
2737  break;
2738  }
2739  /* fall through */
2740  default:
2741  page_zip_fail(("page_zip_decompress_clust:"
2742  " 2 inflate(Z_SYNC_FLUSH)=%s\n",
2743  d_stream->msg));
2744  goto zlib_error;
2745  }
2746 
2747  ut_ad(d_stream->next_out == dst);
2748 
2749  /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
2750  avoid uninitialized bytes in case the record
2751  is affected by page_zip_apply_log(). */
2752  memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2753 
2754  d_stream->next_out += DATA_TRX_ID_LEN
2755  + DATA_ROLL_PTR_LEN;
2756  }
2757 
2758  /* Decompress the last bytes of the record. */
2759  d_stream->avail_out = rec_get_end(rec, offsets)
2760  - d_stream->next_out;
2761 
2762  switch (inflate(d_stream, Z_SYNC_FLUSH)) {
2763  case Z_STREAM_END:
2764  case Z_OK:
2765  case Z_BUF_ERROR:
2766  if (!d_stream->avail_out) {
2767  break;
2768  }
2769  /* fall through */
2770  default:
2771  page_zip_fail(("page_zip_decompress_clust:"
2772  " 3 inflate(Z_SYNC_FLUSH)=%s\n",
2773  d_stream->msg));
2774  goto zlib_error;
2775  }
2776  }
2777 
2778  /* Decompress any trailing garbage, in case the last record was
2779  allocated from an originally longer space on the free list. */
2780  d_stream->avail_out = page_header_get_field(page_zip->data,
2781  PAGE_HEAP_TOP)
2782  - page_offset(d_stream->next_out);
2783  if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
2784  - PAGE_ZIP_START - PAGE_DIR)) {
2785 
2786  page_zip_fail(("page_zip_decompress_clust:"
2787  " avail_out = %u\n",
2788  d_stream->avail_out));
2789  goto zlib_error;
2790  }
2791 
2792  if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
2793  page_zip_fail(("page_zip_decompress_clust:"
2794  " inflate(Z_FINISH)=%s\n",
2795  d_stream->msg));
2796 zlib_error:
2797  inflateEnd(d_stream);
2798  return(FALSE);
2799  }
2800 
2801  /* Note that d_stream->avail_out > 0 may hold here
2802  if the modification log is nonempty. */
2803 
2804 zlib_done:
2805  if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
2806  ut_error;
2807  }
2808 
2809  {
2810  page_t* page = page_align(d_stream->next_out);
2811 
2812  /* Clear the unused heap space on the uncompressed page. */
2813  memset(d_stream->next_out, 0,
2814  page_dir_get_nth_slot(page,
2815  page_dir_get_n_slots(page) - 1)
2816  - d_stream->next_out);
2817  }
2818 
2819 #ifdef UNIV_DEBUG
2820  page_zip->m_start = PAGE_DATA + d_stream->total_in;
2821 #endif /* UNIV_DEBUG */
2822 
2823  /* Apply the modification log. */
2824  {
2825  const byte* mod_log_ptr;
2826  mod_log_ptr = page_zip_apply_log(d_stream->next_in,
2827  d_stream->avail_in + 1,
2828  recs, n_dense,
2829  trx_id_col, heap_status,
2830  index, offsets);
2831 
2832  if (UNIV_UNLIKELY(!mod_log_ptr)) {
2833  return(FALSE);
2834  }
2835  page_zip->m_end = mod_log_ptr - page_zip->data;
2836  page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
2837  }
2838 
2839  if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE)
2840  + page_zip->m_end >= page_zip_get_size(page_zip))) {
2841 
2842  page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
2843  (ulong) page_zip_get_trailer_len(
2844  page_zip, TRUE),
2845  (ulong) page_zip->m_end,
2846  (ulong) page_zip_get_size(page_zip)));
2847  return(FALSE);
2848  }
2849 
2850  storage = page_zip_dir_start_low(page_zip, n_dense);
2851 
2852  externs = storage - n_dense
2853  * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2854 
2855  /* Restore the uncompressed columns in heap_no order. */
2856 
2857  for (slot = 0; slot < n_dense; slot++) {
2858  ulint i;
2859  ulint len;
2860  byte* dst;
2861  rec_t* rec = recs[slot];
2862  ibool exists = !page_zip_dir_find_free(
2863  page_zip, page_offset(rec));
2864  offsets = rec_get_offsets(rec, index, offsets,
2865  ULINT_UNDEFINED, &heap);
2866 
2867  dst = rec_get_nth_field(rec, offsets,
2868  trx_id_col, &len);
2869  ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2870  storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
2871  memcpy(dst, storage,
2872  DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
2873 
2874  /* Check if there are any externally stored
2875  columns in this record. For each externally
2876  stored column, restore or clear the
2877  BTR_EXTERN_FIELD_REF. */
2878  if (!rec_offs_any_extern(offsets)) {
2879  continue;
2880  }
2881 
2882  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
2883  if (!rec_offs_nth_extern(offsets, i)) {
2884  continue;
2885  }
2886  dst = rec_get_nth_field(rec, offsets, i, &len);
2887 
2888  if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
2889  page_zip_fail(("page_zip_decompress_clust:"
2890  " %lu < 20\n",
2891  (ulong) len));
2892  return(FALSE);
2893  }
2894 
2895  dst += len - BTR_EXTERN_FIELD_REF_SIZE;
2896 
2897  if (UNIV_LIKELY(exists)) {
2898  /* Existing record:
2899  restore the BLOB pointer */
2900  externs -= BTR_EXTERN_FIELD_REF_SIZE;
2901 
2902  if (UNIV_UNLIKELY
2903  (externs < page_zip->data
2904  + page_zip->m_end)) {
2905  page_zip_fail(("page_zip_"
2906  "decompress_clust: "
2907  "%p < %p + %lu\n",
2908  (const void*) externs,
2909  (const void*)
2910  page_zip->data,
2911  (ulong)
2912  page_zip->m_end));
2913  return(FALSE);
2914  }
2915 
2916  memcpy(dst, externs,
2917  BTR_EXTERN_FIELD_REF_SIZE);
2918 
2919  page_zip->n_blobs++;
2920  } else {
2921  /* Deleted record:
2922  clear the BLOB pointer */
2923  memset(dst, 0,
2924  BTR_EXTERN_FIELD_REF_SIZE);
2925  }
2926  }
2927  }
2928 
2929  return(TRUE);
2930 }
2931 
2932 /**********************************************************************/
2937 UNIV_INTERN
2938 ibool
2940 /*================*/
2941  page_zip_des_t* page_zip,
2943  page_t* page,
2944  ibool all)
2948 {
2949  z_stream d_stream;
2950  dict_index_t* index = NULL;
2951  rec_t** recs;
2952  ulint n_dense;/* number of user records on the page */
2953  ulint trx_id_col = ULINT_UNDEFINED;
2954  mem_heap_t* heap;
2955  ulint* offsets;
2956 #ifndef UNIV_HOTBACKUP
2957  ullint usec = ut_time_us(NULL);
2958 #endif /* !UNIV_HOTBACKUP */
2959 
2960  ut_ad(page_zip_simple_validate(page_zip));
2961  UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
2962  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
2963 
2964  /* The dense directory excludes the infimum and supremum records. */
2965  n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
2966  if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
2967  >= page_zip_get_size(page_zip))) {
2968  page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
2969  (ulong) n_dense,
2970  (ulong) page_zip_get_size(page_zip)));
2971  return(FALSE);
2972  }
2973 
2974  heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
2975 
2976  recs = static_cast<rec_t**>(
2977  mem_heap_alloc(heap, n_dense * (2 * sizeof *recs)));
2978 
2979  if (all) {
2980  /* Copy the page header. */
2981  memcpy(page, page_zip->data, PAGE_DATA);
2982  } else {
2983  /* Check that the bytes that we skip are identical. */
2984 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
2985  ut_a(!memcmp(FIL_PAGE_TYPE + page,
2986  FIL_PAGE_TYPE + page_zip->data,
2987  PAGE_HEADER - FIL_PAGE_TYPE));
2988  ut_a(!memcmp(PAGE_HEADER + PAGE_LEVEL + page,
2989  PAGE_HEADER + PAGE_LEVEL + page_zip->data,
2990  PAGE_DATA - (PAGE_HEADER + PAGE_LEVEL)));
2991 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
2992 
2993  /* Copy the mutable parts of the page header. */
2994  memcpy(page, page_zip->data, FIL_PAGE_TYPE);
2995  memcpy(PAGE_HEADER + page, PAGE_HEADER + page_zip->data,
2996  PAGE_LEVEL - PAGE_N_DIR_SLOTS);
2997 
2998 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
2999  /* Check that the page headers match after copying. */
3000  ut_a(!memcmp(page, page_zip->data, PAGE_DATA));
3001 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
3002  }
3003 
3004 #ifdef UNIV_ZIP_DEBUG
3005  /* Clear the uncompressed page, except the header. */
3006  memset(PAGE_DATA + page, 0x55, UNIV_PAGE_SIZE - PAGE_DATA);
3007 #endif /* UNIV_ZIP_DEBUG */
3008  UNIV_MEM_INVALID(PAGE_DATA + page, UNIV_PAGE_SIZE - PAGE_DATA);
3009 
3010  /* Copy the page directory. */
3011  if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
3012  recs + n_dense, n_dense))) {
3013 zlib_error:
3014  mem_heap_free(heap);
3015  return(FALSE);
3016  }
3017 
3018  /* Copy the infimum and supremum records. */
3019  memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
3020  infimum_extra, sizeof infimum_extra);
3021  if (page_is_empty(page)) {
3022  rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
3023  PAGE_NEW_SUPREMUM);
3024  } else {
3025  rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
3026  page_zip_dir_get(page_zip, 0)
3027  & PAGE_ZIP_DIR_SLOT_MASK);
3028  }
3029  memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
3030  memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
3031  supremum_extra_data, sizeof supremum_extra_data);
3032 
3033  page_zip_set_alloc(&d_stream, heap);
3034 
3035  d_stream.next_in = page_zip->data + PAGE_DATA;
3036  /* Subtract the space reserved for
3037  the page header and the end marker of the modification log. */
3038  d_stream.avail_in = page_zip_get_size(page_zip) - (PAGE_DATA + 1);
3039  d_stream.next_out = page + PAGE_ZIP_START;
3040  d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
3041 
3042  if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
3043  != Z_OK)) {
3044  ut_error;
3045  }
3046 
3047  /* Decode the zlib header and the index information. */
3048  if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
3049 
3050  page_zip_fail(("page_zip_decompress:"
3051  " 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
3052  goto zlib_error;
3053  }
3054 
3055  if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
3056 
3057  page_zip_fail(("page_zip_decompress:"
3058  " 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
3059  goto zlib_error;
3060  }
3061 
3062  index = page_zip_fields_decode(
3063  page + PAGE_ZIP_START, d_stream.next_out,
3064  page_is_leaf(page) ? &trx_id_col : NULL);
3065 
3066  if (UNIV_UNLIKELY(!index)) {
3067 
3068  goto zlib_error;
3069  }
3070 
3071  /* Decompress the user records. */
3072  page_zip->n_blobs = 0;
3073  d_stream.next_out = page + PAGE_ZIP_START;
3074 
3075  {
3076  /* Pre-allocate the offsets for rec_get_offsets_reverse(). */
3077  ulint n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
3078  + dict_index_get_n_fields(index);
3079 
3080  offsets = static_cast<ulint*>(
3081  mem_heap_alloc(heap, n * sizeof(ulint)));
3082 
3083  *offsets = n;
3084  }
3085 
3086  /* Decompress the records in heap_no order. */
3087  if (!page_is_leaf(page)) {
3088  /* This is a node pointer page. */
3089  ulint info_bits;
3090 
3091  if (UNIV_UNLIKELY
3092  (!page_zip_decompress_node_ptrs(page_zip, &d_stream,
3093  recs, n_dense, index,
3094  offsets, heap))) {
3095  goto err_exit;
3096  }
3097 
3098  info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
3099  ? REC_INFO_MIN_REC_FLAG : 0;
3100 
3101  if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
3102  info_bits))) {
3103  goto err_exit;
3104  }
3105  } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
3106  /* This is a leaf page in a secondary index. */
3107  if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
3108  recs, n_dense,
3109  index, offsets))) {
3110  goto err_exit;
3111  }
3112 
3113  if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
3114  page, 0))) {
3115 err_exit:
3116  page_zip_fields_free(index);
3117  mem_heap_free(heap);
3118  return(FALSE);
3119  }
3120  } else {
3121  /* This is a leaf page in a clustered index. */
3122  if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
3123  &d_stream, recs,
3124  n_dense, index,
3125  trx_id_col,
3126  offsets, heap))) {
3127  goto err_exit;
3128  }
3129 
3130  if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
3131  page, 0))) {
3132  goto err_exit;
3133  }
3134  }
3135 
3136  ut_a(page_is_comp(page));
3137  UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
3138 
3139  page_zip_fields_free(index);
3140  mem_heap_free(heap);
3141 #ifndef UNIV_HOTBACKUP
3142  ullint time_diff = ut_time_us(NULL) - usec;
3143  page_zip_stat[page_zip->ssize - 1].decompressed++;
3144  page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff;
3145 
3146  index_id_t index_id = btr_page_get_index_id(page);
3147 
3149  mutex_enter(&page_zip_stat_per_index_mutex);
3150  page_zip_stat_per_index[index_id].decompressed++;
3151  page_zip_stat_per_index[index_id].decompressed_usec += time_diff;
3152  mutex_exit(&page_zip_stat_per_index_mutex);
3153  }
3154 #endif /* !UNIV_HOTBACKUP */
3155 
3156  /* Update the stat counter for LRU policy. */
3158 
3159  MONITOR_INC(MONITOR_PAGE_DECOMPRESS);
3160 
3161  return(TRUE);
3162 }
3163 
3164 #ifdef UNIV_ZIP_DEBUG
3165 /**********************************************************************/
3167 static
3168 void
3169 page_zip_hexdump_func(
3170 /*==================*/
3171  const char* name,
3172  const void* buf,
3173  ulint size)
3174 {
3175  const byte* s = static_cast<const byte*>(buf);
3176  ulint addr;
3177  const ulint width = 32; /* bytes per line */
3178 
3179  fprintf(stderr, "%s:\n", name);
3180 
3181  for (addr = 0; addr < size; addr += width) {
3182  ulint i;
3183 
3184  fprintf(stderr, "%04lx ", (ulong) addr);
3185 
3186  i = ut_min(width, size - addr);
3187 
3188  while (i--) {
3189  fprintf(stderr, "%02x", *s++);
3190  }
3191 
3192  putc('\n', stderr);
3193  }
3194 }
3195 
3199 #define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
3200 
3202 UNIV_INTERN ibool page_zip_validate_header_only = FALSE;
3203 
3204 /**********************************************************************/
3207 UNIV_INTERN
3208 ibool
3209 page_zip_validate_low(
3210 /*==================*/
3211  const page_zip_des_t* page_zip,
3212  const page_t* page,
3213  const dict_index_t* index,
3214  ibool sloppy)
3216 {
3217  page_zip_des_t temp_page_zip;
3218  byte* temp_page_buf;
3219  page_t* temp_page;
3220  ibool valid;
3221 
3222  if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3224  || memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
3225  || memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3226  PAGE_DATA - FIL_PAGE_DATA)) {
3227  page_zip_fail(("page_zip_validate: page header\n"));
3228  page_zip_hexdump(page_zip, sizeof *page_zip);
3229  page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3230  page_zip_hexdump(page, UNIV_PAGE_SIZE);
3231  return(FALSE);
3232  }
3233 
3234  ut_a(page_is_comp(page));
3235 
3236  if (page_zip_validate_header_only) {
3237  return(TRUE);
3238  }
3239 
3240  /* page_zip_decompress() expects the uncompressed page to be
3241  UNIV_PAGE_SIZE aligned. */
3242  temp_page_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
3243  temp_page = static_cast<byte*>(ut_align(temp_page_buf, UNIV_PAGE_SIZE));
3244 
3245 #ifdef UNIV_DEBUG_VALGRIND
3246  /* Get detailed information on the valid bits in case the
3247  UNIV_MEM_ASSERT_RW() checks fail. The v-bits of page[],
3248  page_zip->data[] or page_zip could be viewed at temp_page[] or
3249  temp_page_zip in a debugger when running valgrind --db-attach. */
3250  (void) VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
3251  UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
3252 # if UNIV_WORD_SIZE == 4
3253  VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip);
3254  /* On 32-bit systems, there is no padding in page_zip_des_t.
3255  On other systems, Valgrind could complain about uninitialized
3256  pad bytes. */
3257  UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip);
3258 # endif
3259  (void) VALGRIND_GET_VBITS(page_zip->data, temp_page,
3260  page_zip_get_size(page_zip));
3261  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3262 #endif /* UNIV_DEBUG_VALGRIND */
3263 
3264  temp_page_zip = *page_zip;
3265  valid = page_zip_decompress(&temp_page_zip, temp_page, TRUE);
3266  if (!valid) {
3267  fputs("page_zip_validate(): failed to decompress\n", stderr);
3268  goto func_exit;
3269  }
3270  if (page_zip->n_blobs != temp_page_zip.n_blobs) {
3271  page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
3272  page_zip->n_blobs, temp_page_zip.n_blobs));
3273  valid = FALSE;
3274  }
3275 #ifdef UNIV_DEBUG
3276  if (page_zip->m_start != temp_page_zip.m_start) {
3277  page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
3278  page_zip->m_start, temp_page_zip.m_start));
3279  valid = FALSE;
3280  }
3281 #endif /* UNIV_DEBUG */
3282  if (page_zip->m_end != temp_page_zip.m_end) {
3283  page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
3284  page_zip->m_end, temp_page_zip.m_end));
3285  valid = FALSE;
3286  }
3287  if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
3288  page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
3289  page_zip->m_nonempty,
3290  temp_page_zip.m_nonempty));
3291  valid = FALSE;
3292  }
3293  if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
3294  UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
3295 
3296  /* In crash recovery, the "minimum record" flag may be
3297  set incorrectly until the mini-transaction is
3298  committed. Let us tolerate that difference when we
3299  are performing a sloppy validation. */
3300 
3301  ulint* offsets;
3302  mem_heap_t* heap;
3303  const rec_t* rec;
3304  const rec_t* trec;
3305  byte info_bits_diff;
3306  ulint offset
3307  = rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE);
3308  ut_a(offset >= PAGE_NEW_SUPREMUM);
3309  offset -= 5/*REC_NEW_INFO_BITS*/;
3310 
3311  info_bits_diff = page[offset] ^ temp_page[offset];
3312 
3313  if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
3314  temp_page[offset] = page[offset];
3315 
3316  if (!memcmp(page + PAGE_HEADER,
3317  temp_page + PAGE_HEADER,
3318  UNIV_PAGE_SIZE - PAGE_HEADER
3319  - FIL_PAGE_DATA_END)) {
3320 
3321  /* Only the minimum record flag
3322  differed. Let us ignore it. */
3323  page_zip_fail(("page_zip_validate: "
3324  "min_rec_flag "
3325  "(%s"
3326  "%lu,%lu,0x%02lx)\n",
3327  sloppy ? "ignored, " : "",
3328  page_get_space_id(page),
3329  page_get_page_no(page),
3330  (ulong) page[offset]));
3331  valid = sloppy;
3332  goto func_exit;
3333  }
3334  }
3335 
3336  /* Compare the pointers in the PAGE_FREE list. */
3337  rec = page_header_get_ptr(page, PAGE_FREE);
3338  trec = page_header_get_ptr(temp_page, PAGE_FREE);
3339 
3340  while (rec || trec) {
3341  if (page_offset(rec) != page_offset(trec)) {
3342  page_zip_fail(("page_zip_validate: "
3343  "PAGE_FREE list: %u!=%u\n",
3344  (unsigned) page_offset(rec),
3345  (unsigned) page_offset(trec)));
3346  valid = FALSE;
3347  goto func_exit;
3348  }
3349 
3350  rec = page_rec_get_next_low(rec, TRUE);
3351  trec = page_rec_get_next_low(trec, TRUE);
3352  }
3353 
3354  /* Compare the records. */
3355  heap = NULL;
3356  offsets = NULL;
3357  rec = page_rec_get_next_low(
3358  page + PAGE_NEW_INFIMUM, TRUE);
3359  trec = page_rec_get_next_low(
3360  temp_page + PAGE_NEW_INFIMUM, TRUE);
3361 
3362  do {
3363  if (page_offset(rec) != page_offset(trec)) {
3364  page_zip_fail(("page_zip_validate: "
3365  "record list: 0x%02x!=0x%02x\n",
3366  (unsigned) page_offset(rec),
3367  (unsigned) page_offset(trec)));
3368  valid = FALSE;
3369  break;
3370  }
3371 
3372  if (index) {
3373  /* Compare the data. */
3374  offsets = rec_get_offsets(
3375  rec, index, offsets,
3376  ULINT_UNDEFINED, &heap);
3377 
3378  if (memcmp(rec - rec_offs_extra_size(offsets),
3379  trec - rec_offs_extra_size(offsets),
3380  rec_offs_size(offsets))) {
3381  page_zip_fail(
3382  ("page_zip_validate: "
3383  "record content: 0x%02x",
3384  (unsigned) page_offset(rec)));
3385  valid = FALSE;
3386  break;
3387  }
3388  }
3389 
3390  rec = page_rec_get_next_low(rec, TRUE);
3391  trec = page_rec_get_next_low(trec, TRUE);
3392  } while (rec || trec);
3393 
3394  if (heap) {
3395  mem_heap_free(heap);
3396  }
3397  }
3398 
3399 func_exit:
3400  if (!valid) {
3401  page_zip_hexdump(page_zip, sizeof *page_zip);
3402  page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
3403  page_zip_hexdump(page, UNIV_PAGE_SIZE);
3404  page_zip_hexdump(temp_page, UNIV_PAGE_SIZE);
3405  }
3406  ut_free(temp_page_buf);
3407  return(valid);
3408 }
3409 
3410 /**********************************************************************/
3413 UNIV_INTERN
3414 ibool
3415 page_zip_validate(
3416 /*==============*/
3417  const page_zip_des_t* page_zip,
3418  const page_t* page,
3419  const dict_index_t* index)
3420 {
3421  return(page_zip_validate_low(page_zip, page, index,
3422  recv_recovery_is_on()));
3423 }
3424 #endif /* UNIV_ZIP_DEBUG */
3425 
3426 #ifdef UNIV_DEBUG
3427 /**********************************************************************/
3430 static
3431 ibool
3432 page_zip_header_cmp(
3433 /*================*/
3434  const page_zip_des_t* page_zip,
3435  const byte* page)
3436 {
3437  ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
3439  ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
3440  2));
3441  ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
3442  PAGE_DATA - FIL_PAGE_DATA));
3443 
3444  return(TRUE);
3445 }
3446 #endif /* UNIV_DEBUG */
3447 
3448 /**********************************************************************/
3452 static
3453 byte*
3454 page_zip_write_rec_ext(
3455 /*===================*/
3456  page_zip_des_t* page_zip,
3457  const page_t* page,
3458  const byte* rec,
3459  dict_index_t* index,
3460  const ulint* offsets,
3461  ulint create,
3462  ulint trx_id_col,
3463  ulint heap_no,
3464  byte* storage,
3465  byte* data)
3466 {
3467  const byte* start = rec;
3468  ulint i;
3469  ulint len;
3470  byte* externs = storage;
3471  ulint n_ext = rec_offs_n_extern(offsets);
3472 
3473  ut_ad(rec_offs_validate(rec, index, offsets));
3474  UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3475  UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3476  rec_offs_extra_size(offsets));
3477 
3478  externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3479  * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
3480 
3481  /* Note that this will not take into account
3482  the BLOB columns of rec if create==TRUE. */
3483  ut_ad(data + rec_offs_data_size(offsets)
3484  - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3485  - n_ext * BTR_EXTERN_FIELD_REF_SIZE
3486  < externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
3487 
3488  {
3489  ulint blob_no = page_zip_get_n_prev_extern(
3490  page_zip, rec, index);
3491  byte* ext_end = externs - page_zip->n_blobs
3493  ut_ad(blob_no <= page_zip->n_blobs);
3494  externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
3495 
3496  if (create) {
3497  page_zip->n_blobs += n_ext;
3498  ASSERT_ZERO_BLOB(ext_end - n_ext
3499  * BTR_EXTERN_FIELD_REF_SIZE);
3500  memmove(ext_end - n_ext
3501  * BTR_EXTERN_FIELD_REF_SIZE,
3502  ext_end,
3503  externs - ext_end);
3504  }
3505 
3506  ut_a(blob_no + n_ext <= page_zip->n_blobs);
3507  }
3508 
3509  for (i = 0; i < rec_offs_n_fields(offsets); i++) {
3510  const byte* src;
3511 
3512  if (UNIV_UNLIKELY(i == trx_id_col)) {
3513  ut_ad(!rec_offs_nth_extern(offsets,
3514  i));
3515  ut_ad(!rec_offs_nth_extern(offsets,
3516  i + 1));
3517  /* Locate trx_id and roll_ptr. */
3518  src = rec_get_nth_field(rec, offsets,
3519  i, &len);
3520  ut_ad(len == DATA_TRX_ID_LEN);
3521  ut_ad(src + DATA_TRX_ID_LEN
3522  == rec_get_nth_field(
3523  rec, offsets,
3524  i + 1, &len));
3525  ut_ad(len == DATA_ROLL_PTR_LEN);
3526 
3527  /* Log the preceding fields. */
3528  ASSERT_ZERO(data, src - start);
3529  memcpy(data, start, src - start);
3530  data += src - start;
3531  start = src + (DATA_TRX_ID_LEN
3532  + DATA_ROLL_PTR_LEN);
3533 
3534  /* Store trx_id and roll_ptr. */
3535  memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3536  * (heap_no - 1),
3537  src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3538  i++; /* skip also roll_ptr */
3539  } else if (rec_offs_nth_extern(offsets, i)) {
3540  src = rec_get_nth_field(rec, offsets,
3541  i, &len);
3542 
3543  ut_ad(dict_index_is_clust(index));
3544  ut_ad(len
3545  >= BTR_EXTERN_FIELD_REF_SIZE);
3546  src += len - BTR_EXTERN_FIELD_REF_SIZE;
3547 
3548  ASSERT_ZERO(data, src - start);
3549  memcpy(data, start, src - start);
3550  data += src - start;
3551  start = src + BTR_EXTERN_FIELD_REF_SIZE;
3552 
3553  /* Store the BLOB pointer. */
3554  externs -= BTR_EXTERN_FIELD_REF_SIZE;
3555  ut_ad(data < externs);
3556  memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
3557  }
3558  }
3559 
3560  /* Log the last bytes of the record. */
3561  len = rec_offs_data_size(offsets) - (start - rec);
3562 
3563  ASSERT_ZERO(data, len);
3564  memcpy(data, start, len);
3565  data += len;
3566 
3567  return(data);
3568 }
3569 
3570 /**********************************************************************/
3573 UNIV_INTERN
3574 void
3576 /*===============*/
3577  page_zip_des_t* page_zip,
3578  const byte* rec,
3579  dict_index_t* index,
3580  const ulint* offsets,
3581  ulint create)
3582 {
3583  const page_t* page;
3584  byte* data;
3585  byte* storage;
3586  ulint heap_no;
3587  byte* slot;
3588 
3589  ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3590  ut_ad(page_zip_simple_validate(page_zip));
3591  ut_ad(page_zip_get_size(page_zip)
3592  > PAGE_DATA + page_zip_dir_size(page_zip));
3593  ut_ad(rec_offs_comp(offsets));
3594  ut_ad(rec_offs_validate(rec, index, offsets));
3595 
3596  ut_ad(page_zip->m_start >= PAGE_DATA);
3597 
3598  page = page_align(rec);
3599 
3600  ut_ad(page_zip_header_cmp(page_zip, page));
3602 
3603  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3604  UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3605  UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3606  rec_offs_extra_size(offsets));
3607 
3608  slot = page_zip_dir_find(page_zip, page_offset(rec));
3609  ut_a(slot);
3610  /* Copy the delete mark. */
3611  if (rec_get_deleted_flag(rec, TRUE)) {
3612  *slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
3613  } else {
3614  *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
3615  }
3616 
3617  ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
3618  ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
3619  - PAGE_DIR - PAGE_DIR_SLOT_SIZE
3620  * page_dir_get_n_slots(page));
3621 
3622  heap_no = rec_get_heap_no_new(rec);
3623  ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
3624  ut_ad(heap_no < page_dir_get_n_heap(page));
3625 
3626  /* Append to the modification log. */
3627  data = page_zip->data + page_zip->m_end;
3628  ut_ad(!*data);
3629 
3630  /* Identify the record by writing its heap number - 1.
3631  0 is reserved to indicate the end of the modification log. */
3632 
3633  if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
3634  *data++ = (byte) (0x80 | (heap_no - 1) >> 7);
3635  ut_ad(!*data);
3636  }
3637  *data++ = (byte) ((heap_no - 1) << 1);
3638  ut_ad(!*data);
3639 
3640  {
3641  const byte* start = rec - rec_offs_extra_size(offsets);
3642  const byte* b = rec - REC_N_NEW_EXTRA_BYTES;
3643 
3644  /* Write the extra bytes backwards, so that
3645  rec_offs_extra_size() can be easily computed in
3646  page_zip_apply_log() by invoking
3647  rec_get_offsets_reverse(). */
3648 
3649  while (b != start) {
3650  *data++ = *--b;
3651  ut_ad(!*data);
3652  }
3653  }
3654 
3655  /* Write the data bytes. Store the uncompressed bytes separately. */
3656  storage = page_zip_dir_start(page_zip);
3657 
3658  if (page_is_leaf(page)) {
3659  ulint len;
3660 
3661  if (dict_index_is_clust(index)) {
3662  ulint trx_id_col;
3663 
3664  trx_id_col = dict_index_get_sys_col_pos(index,
3665  DATA_TRX_ID);
3666  ut_ad(trx_id_col != ULINT_UNDEFINED);
3667 
3668  /* Store separately trx_id, roll_ptr and
3669  the BTR_EXTERN_FIELD_REF of each BLOB column. */
3670  if (rec_offs_any_extern(offsets)) {
3671  data = page_zip_write_rec_ext(
3672  page_zip, page,
3673  rec, index, offsets, create,
3674  trx_id_col, heap_no, storage, data);
3675  } else {
3676  /* Locate trx_id and roll_ptr. */
3677  const byte* src
3678  = rec_get_nth_field(rec, offsets,
3679  trx_id_col, &len);
3680  ut_ad(len == DATA_TRX_ID_LEN);
3681  ut_ad(src + DATA_TRX_ID_LEN
3682  == rec_get_nth_field(
3683  rec, offsets,
3684  trx_id_col + 1, &len));
3685  ut_ad(len == DATA_ROLL_PTR_LEN);
3686 
3687  /* Log the preceding fields. */
3688  ASSERT_ZERO(data, src - rec);
3689  memcpy(data, rec, src - rec);
3690  data += src - rec;
3691 
3692  /* Store trx_id and roll_ptr. */
3693  memcpy(storage
3694  - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
3695  * (heap_no - 1),
3696  src,
3697  DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3698 
3699  src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
3700 
3701  /* Log the last bytes of the record. */
3702  len = rec_offs_data_size(offsets)
3703  - (src - rec);
3704 
3705  ASSERT_ZERO(data, len);
3706  memcpy(data, src, len);
3707  data += len;
3708  }
3709  } else {
3710  /* Leaf page of a secondary index:
3711  no externally stored columns */
3712  ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
3713  == ULINT_UNDEFINED);
3714  ut_ad(!rec_offs_any_extern(offsets));
3715 
3716  /* Log the entire record. */
3717  len = rec_offs_data_size(offsets);
3718 
3719  ASSERT_ZERO(data, len);
3720  memcpy(data, rec, len);
3721  data += len;
3722  }
3723  } else {
3724  /* This is a node pointer page. */
3725  ulint len;
3726 
3727  /* Non-leaf nodes should not have any externally
3728  stored columns. */
3729  ut_ad(!rec_offs_any_extern(offsets));
3730 
3731  /* Copy the data bytes, except node_ptr. */
3732  len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
3733  ut_ad(data + len < storage - REC_NODE_PTR_SIZE
3734  * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
3735  ASSERT_ZERO(data, len);
3736  memcpy(data, rec, len);
3737  data += len;
3738 
3739  /* Copy the node pointer to the uncompressed area. */
3740  memcpy(storage - REC_NODE_PTR_SIZE
3741  * (heap_no - 1),
3742  rec + len,
3743  REC_NODE_PTR_SIZE);
3744  }
3745 
3746  ut_a(!*data);
3747  ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
3748  page_zip->m_end = data - page_zip->data;
3749  page_zip->m_nonempty = TRUE;
3750 
3751 #ifdef UNIV_ZIP_DEBUG
3752  ut_a(page_zip_validate(page_zip, page_align(rec), index));
3753 #endif /* UNIV_ZIP_DEBUG */
3754 }
3755 
3756 /***********************************************************/
3759 UNIV_INTERN
3760 byte*
3762 /*==========================*/
3763  byte* ptr,
3764  byte* end_ptr,
3765  page_t* page,
3766  page_zip_des_t* page_zip)
3767 {
3768  ulint offset;
3769  ulint z_offset;
3770 
3771  ut_ad(!page == !page_zip);
3772 
3773  if (UNIV_UNLIKELY
3774  (end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
3775 
3776  return(NULL);
3777  }
3778 
3779  offset = mach_read_from_2(ptr);
3780  z_offset = mach_read_from_2(ptr + 2);
3781 
3782  if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3783  || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3784  || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3785 corrupt:
3786  recv_sys->found_corrupt_log = TRUE;
3787 
3788  return(NULL);
3789  }
3790 
3791  if (page) {
3792  if (UNIV_UNLIKELY(!page_zip)
3793  || UNIV_UNLIKELY(!page_is_leaf(page))) {
3794 
3795  goto corrupt;
3796  }
3797 
3798 #ifdef UNIV_ZIP_DEBUG
3799  ut_a(page_zip_validate(page_zip, page, NULL));
3800 #endif /* UNIV_ZIP_DEBUG */
3801 
3802  memcpy(page + offset,
3803  ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3804  memcpy(page_zip->data + z_offset,
3805  ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
3806 
3807 #ifdef UNIV_ZIP_DEBUG
3808  ut_a(page_zip_validate(page_zip, page, NULL));
3809 #endif /* UNIV_ZIP_DEBUG */
3810  }
3811 
3812  return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
3813 }
3814 
3815 /**********************************************************************/
3818 UNIV_INTERN
3819 void
3821 /*====================*/
3822  page_zip_des_t* page_zip,
3823  const byte* rec,
3825  dict_index_t* index,
3826  const ulint* offsets,
3827  ulint n,
3828  mtr_t* mtr)
3830 {
3831  const byte* field;
3832  byte* externs;
3833  const page_t* page = page_align(rec);
3834  ulint blob_no;
3835  ulint len;
3836 
3837  ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3839  ut_ad(page_zip_simple_validate(page_zip));
3840  ut_ad(page_zip_get_size(page_zip)
3841  > PAGE_DATA + page_zip_dir_size(page_zip));
3842  ut_ad(rec_offs_comp(offsets));
3843  ut_ad(rec_offs_validate(rec, NULL, offsets));
3844  ut_ad(rec_offs_any_extern(offsets));
3845  ut_ad(rec_offs_nth_extern(offsets, n));
3846 
3847  ut_ad(page_zip->m_start >= PAGE_DATA);
3848  ut_ad(page_zip_header_cmp(page_zip, page));
3849 
3850  ut_ad(page_is_leaf(page));
3851  ut_ad(dict_index_is_clust(index));
3852 
3853  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
3854  UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
3855  UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
3856  rec_offs_extra_size(offsets));
3857 
3858  blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
3859  + rec_get_n_extern_new(rec, index, n);
3860  ut_a(blob_no < page_zip->n_blobs);
3861 
3862  externs = page_zip->data + page_zip_get_size(page_zip)
3863  - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
3864  * (PAGE_ZIP_DIR_SLOT_SIZE
3865  + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
3866 
3867  field = rec_get_nth_field(rec, offsets, n, &len);
3868 
3869  externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
3870  field += len - BTR_EXTERN_FIELD_REF_SIZE;
3871 
3872  memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
3873 
3874 #ifdef UNIV_ZIP_DEBUG
3875  ut_a(page_zip_validate(page_zip, page, index));
3876 #endif /* UNIV_ZIP_DEBUG */
3877 
3878  if (mtr) {
3879 #ifndef UNIV_HOTBACKUP
3880  byte* log_ptr = mlog_open(
3881  mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
3882  if (UNIV_UNLIKELY(!log_ptr)) {
3883  return;
3884  }
3885 
3887  (byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
3888  mach_write_to_2(log_ptr, page_offset(field));
3889  log_ptr += 2;
3890  mach_write_to_2(log_ptr, externs - page_zip->data);
3891  log_ptr += 2;
3892  memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
3893  log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
3894  mlog_close(mtr, log_ptr);
3895 #endif /* !UNIV_HOTBACKUP */
3896  }
3897 }
3898 
3899 /***********************************************************/
3902 UNIV_INTERN
3903 byte*
3905 /*==========================*/
3906  byte* ptr,
3907  byte* end_ptr,
3908  page_t* page,
3909  page_zip_des_t* page_zip)
3910 {
3911  ulint offset;
3912  ulint z_offset;
3913 
3914  ut_ad(!page == !page_zip);
3915 
3916  if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
3917 
3918  return(NULL);
3919  }
3920 
3921  offset = mach_read_from_2(ptr);
3922  z_offset = mach_read_from_2(ptr + 2);
3923 
3924  if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
3925  || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
3926  || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
3927 corrupt:
3928  recv_sys->found_corrupt_log = TRUE;
3929 
3930  return(NULL);
3931  }
3932 
3933  if (page) {
3934  byte* storage_end;
3935  byte* field;
3936  byte* storage;
3937  ulint heap_no;
3938 
3939  if (UNIV_UNLIKELY(!page_zip)
3940  || UNIV_UNLIKELY(page_is_leaf(page))) {
3941 
3942  goto corrupt;
3943  }
3944 
3945 #ifdef UNIV_ZIP_DEBUG
3946  ut_a(page_zip_validate(page_zip, page, NULL));
3947 #endif /* UNIV_ZIP_DEBUG */
3948 
3949  field = page + offset;
3950  storage = page_zip->data + z_offset;
3951 
3952  storage_end = page_zip_dir_start(page_zip);
3953 
3954  heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
3955 
3956  if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
3957  || UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
3958  || UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
3959 
3960  goto corrupt;
3961  }
3962 
3963  memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
3964  memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
3965 
3966 #ifdef UNIV_ZIP_DEBUG
3967  ut_a(page_zip_validate(page_zip, page, NULL));
3968 #endif /* UNIV_ZIP_DEBUG */
3969  }
3970 
3971  return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
3972 }
3973 
3974 /**********************************************************************/
3976 UNIV_INTERN
3977 void
3979 /*====================*/
3980  page_zip_des_t* page_zip,
3981  byte* rec,
3982  ulint size,
3983  ulint ptr,
3984  mtr_t* mtr)
3985 {
3986  byte* field;
3987  byte* storage;
3988 #ifdef UNIV_DEBUG
3989  page_t* page = page_align(rec);
3990 #endif /* UNIV_DEBUG */
3991 
3992  ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
3994  ut_ad(page_zip_simple_validate(page_zip));
3995  ut_ad(page_zip_get_size(page_zip)
3996  > PAGE_DATA + page_zip_dir_size(page_zip));
3997  ut_ad(page_rec_is_comp(rec));
3998 
3999  ut_ad(page_zip->m_start >= PAGE_DATA);
4000  ut_ad(page_zip_header_cmp(page_zip, page));
4001 
4002  ut_ad(!page_is_leaf(page));
4003 
4004  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4005  UNIV_MEM_ASSERT_RW(rec, size);
4006 
4007  storage = page_zip_dir_start(page_zip)
4008  - (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
4009  field = rec + size - REC_NODE_PTR_SIZE;
4010 
4011 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
4012  ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
4013 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
4014 #if REC_NODE_PTR_SIZE != 4
4015 # error "REC_NODE_PTR_SIZE != 4"
4016 #endif
4017  mach_write_to_4(field, ptr);
4018  memcpy(storage, field, REC_NODE_PTR_SIZE);
4019 
4020  if (mtr) {
4021 #ifndef UNIV_HOTBACKUP
4022  byte* log_ptr = mlog_open(mtr,
4023  11 + 2 + 2 + REC_NODE_PTR_SIZE);
4024  if (UNIV_UNLIKELY(!log_ptr)) {
4025  return;
4026  }
4027 
4029  field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
4030  mach_write_to_2(log_ptr, page_offset(field));
4031  log_ptr += 2;
4032  mach_write_to_2(log_ptr, storage - page_zip->data);
4033  log_ptr += 2;
4034  memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
4035  log_ptr += REC_NODE_PTR_SIZE;
4036  mlog_close(mtr, log_ptr);
4037 #endif /* !UNIV_HOTBACKUP */
4038  }
4039 }
4040 
4041 /**********************************************************************/
4043 UNIV_INTERN
4044 void
4046 /*===============================*/
4047  page_zip_des_t* page_zip,
4048  byte* rec,
4049  const ulint* offsets,
4050  ulint trx_id_col,
4051  trx_id_t trx_id,
4052  roll_ptr_t roll_ptr)
4053 {
4054  byte* field;
4055  byte* storage;
4056 #ifdef UNIV_DEBUG
4057  page_t* page = page_align(rec);
4058 #endif /* UNIV_DEBUG */
4059  ulint len;
4060 
4061  ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
4062 
4064  ut_ad(page_zip_simple_validate(page_zip));
4065  ut_ad(page_zip_get_size(page_zip)
4066  > PAGE_DATA + page_zip_dir_size(page_zip));
4067  ut_ad(rec_offs_validate(rec, NULL, offsets));
4068  ut_ad(rec_offs_comp(offsets));
4069 
4070  ut_ad(page_zip->m_start >= PAGE_DATA);
4071  ut_ad(page_zip_header_cmp(page_zip, page));
4072 
4073  ut_ad(page_is_leaf(page));
4074 
4075  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4076 
4077  storage = page_zip_dir_start(page_zip)
4078  - (rec_get_heap_no_new(rec) - 1)
4079  * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4080 
4081 #if DATA_TRX_ID + 1 != DATA_ROLL_PTR
4082 # error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
4083 #endif
4084  field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
4085  ut_ad(len == DATA_TRX_ID_LEN);
4086  ut_ad(field + DATA_TRX_ID_LEN
4087  == rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
4088  ut_ad(len == DATA_ROLL_PTR_LEN);
4089 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
4090  ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
4091 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
4092 #if DATA_TRX_ID_LEN != 6
4093 # error "DATA_TRX_ID_LEN != 6"
4094 #endif
4095  mach_write_to_6(field, trx_id);
4096 #if DATA_ROLL_PTR_LEN != 7
4097 # error "DATA_ROLL_PTR_LEN != 7"
4098 #endif
4099  mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
4100  memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4101 
4102  UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
4103  UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
4104  rec_offs_extra_size(offsets));
4105  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4106 }
4107 
4108 /**********************************************************************/
4111 static
4112 void
4113 page_zip_clear_rec(
4114 /*===============*/
4115  page_zip_des_t* page_zip,
4116  byte* rec,
4117  const dict_index_t* index,
4118  const ulint* offsets)
4119 {
4120  ulint heap_no;
4121  page_t* page = page_align(rec);
4122  byte* storage;
4123  byte* field;
4124  ulint len;
4125  /* page_zip_validate() would fail here if a record
4126  containing externally stored columns is being deleted. */
4127  ut_ad(rec_offs_validate(rec, index, offsets));
4128  ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
4129  ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
4130  ut_ad(page_zip_header_cmp(page_zip, page));
4131 
4132  heap_no = rec_get_heap_no_new(rec);
4133  ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
4134 
4135  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4136  UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
4137  UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
4138  rec_offs_extra_size(offsets));
4139 
4140  if (!page_is_leaf(page)) {
4141  /* Clear node_ptr. On the compressed page,
4142  there is an array of node_ptr immediately before the
4143  dense page directory, at the very end of the page. */
4144  storage = page_zip_dir_start(page_zip);
4146  rec_offs_n_fields(offsets) - 1);
4147  field = rec_get_nth_field(rec, offsets,
4148  rec_offs_n_fields(offsets) - 1,
4149  &len);
4150  ut_ad(len == REC_NODE_PTR_SIZE);
4151 
4152  ut_ad(!rec_offs_any_extern(offsets));
4153  memset(field, 0, REC_NODE_PTR_SIZE);
4154  memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
4155  0, REC_NODE_PTR_SIZE);
4156  } else if (dict_index_is_clust(index)) {
4157  /* Clear trx_id and roll_ptr. On the compressed page,
4158  there is an array of these fields immediately before the
4159  dense page directory, at the very end of the page. */
4160  const ulint trx_id_pos
4162  dict_table_get_sys_col(
4163  index->table, DATA_TRX_ID), index);
4164  storage = page_zip_dir_start(page_zip);
4165  field = rec_get_nth_field(rec, offsets, trx_id_pos, &len);
4166  ut_ad(len == DATA_TRX_ID_LEN);
4167 
4168  memset(field, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4169  memset(storage - (heap_no - 1)
4170  * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4171  0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4172 
4173  if (rec_offs_any_extern(offsets)) {
4174  ulint i;
4175 
4176  for (i = rec_offs_n_fields(offsets); i--; ) {
4177  /* Clear all BLOB pointers in order to make
4178  page_zip_validate() pass. */
4179  if (rec_offs_nth_extern(offsets, i)) {
4180  field = rec_get_nth_field(
4181  rec, offsets, i, &len);
4182  ut_ad(len
4183  == BTR_EXTERN_FIELD_REF_SIZE);
4184  memset(field + len
4185  - BTR_EXTERN_FIELD_REF_SIZE,
4186  0, BTR_EXTERN_FIELD_REF_SIZE);
4187  }
4188  }
4189  }
4190  } else {
4191  ut_ad(!rec_offs_any_extern(offsets));
4192  }
4193 
4194 #ifdef UNIV_ZIP_DEBUG
4195  ut_a(page_zip_validate(page_zip, page, index));
4196 #endif /* UNIV_ZIP_DEBUG */
4197 }
4198 
4199 /**********************************************************************/
4202 UNIV_INTERN
4203 void
4205 /*=====================*/
4206  page_zip_des_t* page_zip,
4207  const byte* rec,
4208  ulint flag)
4209 {
4210  byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
4211  ut_a(slot);
4212  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4213  if (flag) {
4214  *slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
4215  } else {
4216  *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
4217  }
4218 #ifdef UNIV_ZIP_DEBUG
4219  ut_a(page_zip_validate(page_zip, page_align(rec), NULL));
4220 #endif /* UNIV_ZIP_DEBUG */
4221 }
4222 
4223 /**********************************************************************/
4226 UNIV_INTERN
4227 void
4229 /*===================*/
4230  page_zip_des_t* page_zip,
4231  const byte* rec,
4232  ulint flag)
4233 {
4234  byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
4235  ut_a(slot);
4236  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4237  if (flag) {
4238  *slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
4239  } else {
4240  *slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
4241  }
4242 }
4243 
4244 /**********************************************************************/
4246 UNIV_INTERN
4247 void
4249 /*================*/
4250  page_zip_des_t* page_zip,
4251  const byte* prev_rec,
4252  const byte* free_rec,
4254  byte* rec)
4255 {
4256  ulint n_dense;
4257  byte* slot_rec;
4258  byte* slot_free;
4259 
4260  ut_ad(prev_rec != rec);
4261  ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
4262  ut_ad(page_zip_simple_validate(page_zip));
4263 
4264  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4265 
4266  if (page_rec_is_infimum(prev_rec)) {
4267  /* Use the first slot. */
4268  slot_rec = page_zip->data + page_zip_get_size(page_zip);
4269  } else {
4270  byte* end = page_zip->data + page_zip_get_size(page_zip);
4271  byte* start = end - page_zip_dir_user_size(page_zip);
4272 
4273  if (UNIV_LIKELY(!free_rec)) {
4274  /* PAGE_N_RECS was already incremented
4275  in page_cur_insert_rec_zip(), but the
4276  dense directory slot at that position
4277  contains garbage. Skip it. */
4278  start += PAGE_ZIP_DIR_SLOT_SIZE;
4279  }
4280 
4281  slot_rec = page_zip_dir_find_low(start, end,
4282  page_offset(prev_rec));
4283  ut_a(slot_rec);
4284  }
4285 
4286  /* Read the old n_dense (n_heap may have been incremented). */
4287  n_dense = page_dir_get_n_heap(page_zip->data)
4288  - (PAGE_HEAP_NO_USER_LOW + 1);
4289 
4290  if (UNIV_LIKELY_NULL(free_rec)) {
4291  /* The record was allocated from the free list.
4292  Shift the dense directory only up to that slot.
4293  Note that in this case, n_dense is actually
4294  off by one, because page_cur_insert_rec_zip()
4295  did not increment n_heap. */
4296  ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
4297  + PAGE_HEAP_NO_USER_LOW);
4298  ut_ad(rec >= free_rec);
4299  slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
4300  ut_ad(slot_free);
4301  slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4302  } else {
4303  /* The record was allocated from the heap.
4304  Shift the entire dense directory. */
4305  ut_ad(rec_get_heap_no_new(rec) == n_dense
4306  + PAGE_HEAP_NO_USER_LOW);
4307 
4308  /* Shift to the end of the dense page directory. */
4309  slot_free = page_zip->data + page_zip_get_size(page_zip)
4310  - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4311  }
4312 
4313  /* Shift the dense directory to allocate place for rec. */
4314  memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
4315  slot_rec - slot_free);
4316 
4317  /* Write the entry for the inserted record.
4318  The "owned" and "deleted" flags must be zero. */
4319  mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
4320 }
4321 
4322 /**********************************************************************/
4325 UNIV_INTERN
4326 void
4328 /*================*/
4329  page_zip_des_t* page_zip,
4330  byte* rec,
4331  const dict_index_t* index,
4332  const ulint* offsets,
4333  const byte* free)
4335 {
4336  byte* slot_rec;
4337  byte* slot_free;
4338  ulint n_ext;
4339  page_t* page = page_align(rec);
4340 
4341  ut_ad(rec_offs_validate(rec, index, offsets));
4342  ut_ad(rec_offs_comp(offsets));
4343 
4344  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4345  UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
4346  UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
4347  rec_offs_extra_size(offsets));
4348 
4349  slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
4350 
4351  ut_a(slot_rec);
4352 
4353  /* This could not be done before page_zip_dir_find(). */
4354  page_header_set_field(page, page_zip, PAGE_N_RECS,
4355  (ulint)(page_get_n_recs(page) - 1));
4356 
4357  if (UNIV_UNLIKELY(!free)) {
4358  /* Make the last slot the start of the free list. */
4359  slot_free = page_zip->data + page_zip_get_size(page_zip)
4360  - PAGE_ZIP_DIR_SLOT_SIZE
4361  * (page_dir_get_n_heap(page_zip->data)
4362  - PAGE_HEAP_NO_USER_LOW);
4363  } else {
4364  slot_free = page_zip_dir_find_free(page_zip,
4365  page_offset(free));
4366  ut_a(slot_free < slot_rec);
4367  /* Grow the free list by one slot by moving the start. */
4368  slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
4369  }
4370 
4371  if (UNIV_LIKELY(slot_rec > slot_free)) {
4372  memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
4373  slot_free,
4374  slot_rec - slot_free);
4375  }
4376 
4377  /* Write the entry for the deleted record.
4378  The "owned" and "deleted" flags will be cleared. */
4379  mach_write_to_2(slot_free, page_offset(rec));
4380 
4381  if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
4382  ut_ad(!rec_offs_any_extern(offsets));
4383  goto skip_blobs;
4384  }
4385 
4386  n_ext = rec_offs_n_extern(offsets);
4387  if (UNIV_UNLIKELY(n_ext)) {
4388  /* Shift and zero fill the array of BLOB pointers. */
4389  ulint blob_no;
4390  byte* externs;
4391  byte* ext_end;
4392 
4393  blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
4394  ut_a(blob_no + n_ext <= page_zip->n_blobs);
4395 
4396  externs = page_zip->data + page_zip_get_size(page_zip)
4397  - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
4398  * (PAGE_ZIP_DIR_SLOT_SIZE
4399  + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4400 
4401  ext_end = externs - page_zip->n_blobs
4403  externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
4404 
4405  page_zip->n_blobs -= n_ext;
4406  /* Shift and zero fill the array. */
4407  memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
4408  (page_zip->n_blobs - blob_no)
4409  * BTR_EXTERN_FIELD_REF_SIZE);
4410  memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
4411  }
4412 
4413 skip_blobs:
4414  /* The compression algorithm expects info_bits and n_owned
4415  to be 0 for deleted records. */
4416  rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
4417 
4418  page_zip_clear_rec(page_zip, rec, index, offsets);
4419 }
4420 
4421 /**********************************************************************/
4423 UNIV_INTERN
4424 void
4426 /*==================*/
4427  page_zip_des_t* page_zip,
4428  ulint is_clustered)
4430 {
4431  ulint n_dense;
4432  byte* dir;
4433  byte* stored;
4434 
4435  ut_ad(page_is_comp(page_zip->data));
4436  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4437 
4438  /* Read the old n_dense (n_heap has already been incremented). */
4439  n_dense = page_dir_get_n_heap(page_zip->data)
4440  - (PAGE_HEAP_NO_USER_LOW + 1);
4441 
4442  dir = page_zip->data + page_zip_get_size(page_zip)
4443  - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
4444 
4445  if (!page_is_leaf(page_zip->data)) {
4446  ut_ad(!page_zip->n_blobs);
4447  stored = dir - n_dense * REC_NODE_PTR_SIZE;
4448  } else if (is_clustered) {
4449  /* Move the BLOB pointer array backwards to make space for the
4450  roll_ptr and trx_id columns and the dense directory slot. */
4451  byte* externs;
4452 
4453  stored = dir - n_dense
4454  * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4455  externs = stored
4456  - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4457  ASSERT_ZERO(externs
4458  - (PAGE_ZIP_DIR_SLOT_SIZE
4459  + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4460  PAGE_ZIP_DIR_SLOT_SIZE
4461  + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
4462  memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
4463  + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
4464  externs, stored - externs);
4465  } else {
4466  stored = dir
4467  - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
4468  ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
4469  PAGE_ZIP_DIR_SLOT_SIZE);
4470  }
4471 
4472  /* Move the uncompressed area backwards to make space
4473  for one directory slot. */
4474  memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
4475 }
4476 
4477 /***********************************************************/
4480 UNIV_INTERN
4481 byte*
4483 /*========================*/
4484  byte* ptr,
4485  byte* end_ptr,
4486  page_t* page,
4487  page_zip_des_t* page_zip)
4488 {
4489  ulint offset;
4490  ulint len;
4491 
4492  ut_ad(ptr && end_ptr);
4493  ut_ad(!page == !page_zip);
4494 
4495  if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
4496 
4497  return(NULL);
4498  }
4499 
4500  offset = (ulint) *ptr++;
4501  len = (ulint) *ptr++;
4502 
4503  if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
4504 corrupt:
4505  recv_sys->found_corrupt_log = TRUE;
4506 
4507  return(NULL);
4508  }
4509 
4510  if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
4511 
4512  return(NULL);
4513  }
4514 
4515  if (page) {
4516  if (UNIV_UNLIKELY(!page_zip)) {
4517 
4518  goto corrupt;
4519  }
4520 #ifdef UNIV_ZIP_DEBUG
4521  ut_a(page_zip_validate(page_zip, page, NULL));
4522 #endif /* UNIV_ZIP_DEBUG */
4523 
4524  memcpy(page + offset, ptr, len);
4525  memcpy(page_zip->data + offset, ptr, len);
4526 
4527 #ifdef UNIV_ZIP_DEBUG
4528  ut_a(page_zip_validate(page_zip, page, NULL));
4529 #endif /* UNIV_ZIP_DEBUG */
4530  }
4531 
4532  return(ptr + len);
4533 }
4534 
4535 #ifndef UNIV_HOTBACKUP
4536 /**********************************************************************/
4538 UNIV_INTERN
4539 void
4541 /*======================*/
4542  const byte* data,
4543  ulint length,
4544  mtr_t* mtr)
4545 {
4546  byte* log_ptr = mlog_open(mtr, 11 + 1 + 1);
4547  ulint offset = page_offset(data);
4548 
4549  ut_ad(offset < PAGE_DATA);
4550  ut_ad(offset + length < PAGE_DATA);
4551 #if PAGE_DATA > 255
4552 # error "PAGE_DATA > 255"
4553 #endif
4554  ut_ad(length < 256);
4555 
4556  /* If no logging is requested, we may return now */
4557  if (UNIV_UNLIKELY(!log_ptr)) {
4558 
4559  return;
4560  }
4561 
4563  (byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
4564  *log_ptr++ = (byte) offset;
4565  *log_ptr++ = (byte) length;
4566  mlog_close(mtr, log_ptr);
4567 
4568  mlog_catenate_string(mtr, data, length);
4569 }
4570 #endif /* !UNIV_HOTBACKUP */
4571 
4572 /**********************************************************************/
4583 UNIV_INTERN
4584 ibool
4586 /*================*/
4587  buf_block_t* block,
4591  dict_index_t* index,
4592  mtr_t* mtr)
4593 {
4594 #ifndef UNIV_HOTBACKUP
4595  buf_pool_t* buf_pool = buf_pool_from_block(block);
4596 #endif /* !UNIV_HOTBACKUP */
4597  page_zip_des_t* page_zip = buf_block_get_page_zip(block);
4598  page_t* page = buf_block_get_frame(block);
4599  buf_block_t* temp_block;
4600  page_t* temp_page;
4601  ulint log_mode;
4602 
4603  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
4604  ut_ad(page_is_comp(page));
4605  ut_ad(!dict_index_is_ibuf(index));
4606  /* Note that page_zip_validate(page_zip, page, index) may fail here. */
4607  UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
4608  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
4609 
4610  /* Disable logging */
4611  log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
4612 
4613 #ifndef UNIV_HOTBACKUP
4614  temp_block = buf_block_alloc(buf_pool);
4616  block->check_index_page_at_flush = TRUE;
4617 #else /* !UNIV_HOTBACKUP */
4618  ut_ad(block == back_block1);
4619  temp_block = back_block2;
4620 #endif /* !UNIV_HOTBACKUP */
4621  temp_page = temp_block->frame;
4622 
4623  /* Copy the old page to temporary space */
4624  buf_frame_copy(temp_page, page);
4625 
4626  btr_blob_dbg_remove(page, index, "zip_reorg");
4627 
4628  /* Recreate the page: note that global data on page (possible
4629  segment headers, next page-field, etc.) is preserved intact */
4630 
4631  page_create(block, mtr, TRUE);
4632 
4633  /* Copy the records from the temporary space to the recreated page;
4634  do not copy the lock bits yet */
4635 
4636  page_copy_rec_list_end_no_locks(block, temp_block,
4637  page_get_infimum_rec(temp_page),
4638  index, mtr);
4639 
4640  if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) {
4641  /* Copy max trx id to recreated page */
4642  trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
4643  page_set_max_trx_id(block, NULL, max_trx_id, NULL);
4644  ut_ad(max_trx_id != 0);
4645  }
4646 
4647  /* Restore logging. */
4648  mtr_set_log_mode(mtr, log_mode);
4649 
4650  if (!page_zip_compress(page_zip, page, index, page_zip_level, mtr)) {
4651 
4652 #ifndef UNIV_HOTBACKUP
4653  buf_block_free(temp_block);
4654 #endif /* !UNIV_HOTBACKUP */
4655  return(FALSE);
4656  }
4657 
4658  lock_move_reorganize_page(block, temp_block);
4659 
4660 #ifndef UNIV_HOTBACKUP
4661  buf_block_free(temp_block);
4662 #endif /* !UNIV_HOTBACKUP */
4663  return(TRUE);
4664 }
4665 
4666 #ifndef UNIV_HOTBACKUP
4667 /**********************************************************************/
4672 UNIV_INTERN
4673 void
4675 /*===============*/
4676  page_zip_des_t* page_zip,
4679  page_t* page,
4680  const page_zip_des_t* src_zip,
4681  const page_t* src,
4682  dict_index_t* index,
4683  mtr_t* mtr)
4684 {
4685  ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
4686  ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX));
4687  ut_ad(!dict_index_is_ibuf(index));
4688 #ifdef UNIV_ZIP_DEBUG
4689  /* The B-tree operations that call this function may set
4690  FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
4691  mismatch. A strict page_zip_validate() will be executed later
4692  during the B-tree operations. */
4693  ut_a(page_zip_validate_low(src_zip, src, index, TRUE));
4694 #endif /* UNIV_ZIP_DEBUG */
4695  ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
4696  if (UNIV_UNLIKELY(src_zip->n_blobs)) {
4697  ut_a(page_is_leaf(src));
4698  ut_a(dict_index_is_clust(index));
4699  }
4700 
4701  /* The PAGE_MAX_TRX_ID must be set on leaf pages of secondary
4702  indexes. It does not matter on other pages. */
4703  ut_a(dict_index_is_clust(index) || !page_is_leaf(src)
4704  || page_get_max_trx_id(src));
4705 
4706  UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
4707  UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
4708  UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
4709  UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
4710 
4711  /* Copy those B-tree page header fields that are related to
4712  the records stored in the page. Also copy the field
4713  PAGE_MAX_TRX_ID. Skip the rest of the page header and
4714  trailer. On the compressed page, there is no trailer. */
4715 #if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END
4716 # error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END"
4717 #endif
4718  memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
4719  PAGE_HEADER_PRIV_END);
4720  memcpy(PAGE_DATA + page, PAGE_DATA + src,
4721  UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
4722  memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
4723  PAGE_HEADER_PRIV_END);
4724  memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
4725  page_zip_get_size(page_zip) - PAGE_DATA);
4726 
4727  /* Copy all fields of src_zip to page_zip, except the pointer
4728  to the compressed data page. */
4729  {
4730  page_zip_t* data = page_zip->data;
4731  memcpy(page_zip, src_zip, sizeof *page_zip);
4732  page_zip->data = data;
4733  }
4734  ut_ad(page_zip_get_trailer_len(page_zip, dict_index_is_clust(index))
4735  + page_zip->m_end < page_zip_get_size(page_zip));
4736 
4737  if (!page_is_leaf(src)
4738  && UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
4739  && UNIV_LIKELY(mach_read_from_4(page
4740  + FIL_PAGE_PREV) != FIL_NULL)) {
4741  /* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
4742  ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
4743  TRUE);
4744  if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
4745  rec_t* rec = page + offs;
4746  ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
4747  & REC_INFO_MIN_REC_FLAG);
4748  rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
4749  }
4750  }
4751 
4752 #ifdef UNIV_ZIP_DEBUG
4753  ut_a(page_zip_validate(page_zip, page, index));
4754 #endif /* UNIV_ZIP_DEBUG */
4755  btr_blob_dbg_add(page, index, "page_zip_copy_recs");
4756 
4757  page_zip_compress_write_log(page_zip, page, index, mtr);
4758 }
4759 #endif /* !UNIV_HOTBACKUP */
4760 
4761 /**********************************************************************/
4764 UNIV_INTERN
4765 byte*
4767 /*====================*/
4768  byte* ptr,
4769  byte* end_ptr,
4770  page_t* page,
4771  page_zip_des_t* page_zip)
4772 {
4773  ulint size;
4774  ulint trailer_size;
4775 
4776  ut_ad(ptr && end_ptr);
4777  ut_ad(!page == !page_zip);
4778 
4779  if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
4780 
4781  return(NULL);
4782  }
4783 
4784  size = mach_read_from_2(ptr);
4785  ptr += 2;
4786  trailer_size = mach_read_from_2(ptr);
4787  ptr += 2;
4788 
4789  if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
4790 
4791  return(NULL);
4792  }
4793 
4794  if (page) {
4795  if (UNIV_UNLIKELY(!page_zip)
4796  || UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
4797 corrupt:
4798  recv_sys->found_corrupt_log = TRUE;
4799 
4800  return(NULL);
4801  }
4802 
4803  memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
4804  memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
4805  memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
4806  memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
4807  page_zip_get_size(page_zip) - trailer_size
4808  - (FIL_PAGE_TYPE + size));
4809  memcpy(page_zip->data + page_zip_get_size(page_zip)
4810  - trailer_size, ptr + 8 + size, trailer_size);
4811 
4812  if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page,
4813  TRUE))) {
4814 
4815  goto corrupt;
4816  }
4817  }
4818 
4819  return(ptr + 8 + size + trailer_size);
4820 }
4821 
4822 /**********************************************************************/
4825 UNIV_INTERN
4826 ulint
4828 /*===================*/
4829  const void* data,
4830  ulint size,
4832 {
4833  uLong adler;
4834  ib_uint32_t crc32;
4835  const Bytef* s = static_cast<const byte*>(data);
4836 
4837  /* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
4838  and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
4839 
4840  switch (algo) {
4843 
4845 
4846  crc32 = ut_crc32(s + FIL_PAGE_OFFSET,
4848  ^ ut_crc32(s + FIL_PAGE_TYPE, 2)
4849  ^ ut_crc32(s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
4851 
4852  return((ulint) crc32);
4856 
4857  adler = adler32(0L, s + FIL_PAGE_OFFSET,
4859  adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
4860  adler = adler32(adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
4862 
4863  return((ulint) adler);
4866  return(BUF_NO_CHECKSUM_MAGIC);
4867  /* no default so the compiler will emit a warning if new enum
4868  is added and not handled here */
4869  }
4870 
4871  ut_error;
4872  return(0);
4873 }
4874 
4875 /**********************************************************************/
4879 UNIV_INTERN
4880 ibool
4882 /*=====================*/
4883  const void* data,
4884  ulint size)
4885 {
4886  ib_uint32_t stored;
4887  ib_uint32_t calc;
4888  ib_uint32_t crc32 = 0 /* silence bogus warning */;
4889  ib_uint32_t innodb = 0 /* silence bogus warning */;
4890 
4891  stored = mach_read_from_4(
4892  (const unsigned char*) data + FIL_PAGE_SPACE_OR_CHKSUM);
4893 
4894  /* declare empty pages non-corrupted */
4895  if (stored == 0) {
4896  /* make sure that the page is really empty */
4897  ut_d(ulint i; for (i = 0; i < size; i++) {
4898  ut_a(*((const char*) data + i) == 0); });
4899 
4900  return(TRUE);
4901  }
4902 
4903  calc = page_zip_calc_checksum(
4904  data, size, static_cast<srv_checksum_algorithm_t>(
4906 
4907  if (stored == calc) {
4908  return(TRUE);
4909  }
4910 
4915  return(stored == calc);
4917  if (stored == BUF_NO_CHECKSUM_MAGIC) {
4918  return(TRUE);
4919  }
4920  crc32 = calc;
4921  innodb = page_zip_calc_checksum(
4922  data, size, SRV_CHECKSUM_ALGORITHM_INNODB);
4923  break;
4925  if (stored == BUF_NO_CHECKSUM_MAGIC) {
4926  return(TRUE);
4927  }
4928  crc32 = page_zip_calc_checksum(
4929  data, size, SRV_CHECKSUM_ALGORITHM_CRC32);
4930  innodb = calc;
4931  break;
4933  return(TRUE);
4934  /* no default so the compiler will emit a warning if new enum
4935  is added and not handled here */
4936  }
4937 
4938  return(stored == crc32 || stored == innodb);
4939 }