13 #define NGX_HTTP_CHARSET_OFF -2
14 #define NGX_HTTP_NO_CHARSET -3
15 #define NGX_HTTP_CHARSET_VAR 0x10000
20 #define NGX_HTML_ENTITY_LEN (sizeof("") - 1)
119 static void *ngx_http_charset_create_main_conf(
ngx_conf_t *cf);
120 static void *ngx_http_charset_create_loc_conf(
ngx_conf_t *cf);
121 static char *ngx_http_charset_merge_loc_conf(
ngx_conf_t *cf,
122 void *parent,
void *child);
142 ngx_http_set_charset_slot,
150 ngx_http_set_charset_slot,
168 &ngx_http_charset_default_types[0] },
172 ngx_http_charset_map_block,
183 ngx_http_charset_postconfiguration,
185 ngx_http_charset_create_main_conf,
191 ngx_http_charset_create_loc_conf,
192 ngx_http_charset_merge_loc_conf
198 &ngx_http_charset_filter_module_ctx,
199 ngx_http_charset_filter_commands,
225 charset = ngx_http_destination_charset(r, &dst);
228 charset = ngx_http_main_request_charset(r, &dst);
236 return ngx_http_next_header_filter(r);
241 source_charset = ngx_http_source_charset(r, &src);
253 "charset: \"%V\" > \"%V\"", &src, &dst);
256 ngx_http_set_charset(r, &dst);
258 return ngx_http_next_header_filter(r);
264 if (source_charset != charset
270 ngx_http_set_charset(r, &dst);
272 return ngx_http_next_header_filter(r);
278 if (source_charset != charset
279 && (charsets[source_charset].tables == NULL
280 || charsets[source_charset].tables[charset] == NULL))
287 ngx_http_set_charset(r, &dst);
289 if (source_charset != charset) {
290 return ngx_http_charset_ctx(r, charsets, charset, source_charset);
293 return ngx_http_next_header_filter(r);
298 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
301 return ngx_http_next_header_filter(r);
330 charset = ngx_http_get_charset(r, name);
337 "unknown charset \"%V\" to override", name);
363 *name = charsets[charset].
name;
376 return ngx_http_get_charset(r, name);
396 if (main_charset->
len == 0) {
407 charset = ngx_http_get_charset(r, main_charset);
411 *src = *main_charset;
428 return ngx_http_get_charset(r, name);
443 *name = charsets[charset].
name;
456 return ngx_http_get_charset(r, name);
472 for (i = 0; i < n; i++) {
473 if (charset[i].name.
len != name->
len) {
522 ctx->
table = charsets[source_charset].
tables[charset];
538 return ngx_http_next_header_filter(r);
552 if (ctx == NULL || ctx->
table == NULL) {
553 return ngx_http_next_body_filter(r, in);
561 for (cl = in; cl; cl = cl->
next) {
580 *ll = ngx_http_charset_recode_to_utf8(r->
pool, b, ctx);
583 *ll = ngx_http_charset_recode_from_utf8(r->
pool, b, ctx);
595 rc = ngx_http_next_body_filter(r, out);
598 if (ctx->
busy == NULL) {
639 for (cl = in; cl; cl = cl->
next) {
640 (void) ngx_http_charset_recode(cl->
buf, ctx->
table);
643 return ngx_http_next_body_filter(r, in);
648 ngx_http_charset_recode(
ngx_buf_t *b, u_char *table)
654 for (p = b->
pos; p < last; p++) {
656 if (*p != table[*p]) {
666 if (*p != table[*p]) {
685 u_char c, *p, *src, *dst, *saved, **table;
695 for ( ; src < buf->
last; src++) {
701 len = src - buf->
pos;
704 out = ngx_http_charset_get_buf(pool, ctx);
722 size = buf->
last - src;
727 if (n == 0xfffffffe) {
740 size = len + buf->
last - src;
748 cl = ngx_http_charset_get_buffer(pool, ctx, size);
780 "http charset utf saved: %z", ctx->
saved_len);
787 if (p == buf->
last) {
798 table = (u_char **) ctx->
table;
805 }
else if (n == 0xfffffffe) {
810 out = ngx_http_charset_get_buf(pool, ctx);
835 cl = ngx_http_charset_get_buffer(pool, ctx, size);
848 }
else if (n == 0xfffffffe) {
852 "http charset invalid utf 0");
856 }
else if (n > 0x10ffff) {
860 "http charset invalid utf 1");
873 table = (u_char **) ctx->
table;
875 while (src < buf->last) {
882 cl = ngx_http_charset_get_buffer(pool, ctx, size);
899 len = buf->
last - src;
921 if (n == 0xfffffffe) {
939 "http charset invalid utf 2");
966 u_char *p, *src, *dst, *table;
972 for (src = buf->
pos; src < buf->last; src++) {
997 len = src - buf->
pos;
1000 out = ngx_http_charset_get_buf(pool, ctx);
1018 size = buf->
last - src;
1019 size = size / 2 + size / 2 * ctx->
length;
1024 size = buf->
last - src;
1025 size = len + size / 2 + size / 2 * ctx->
length;
1030 cl = ngx_http_charset_get_buffer(pool, ctx, size);
1047 while (src < buf->last) {
1052 if ((
size_t) (b->
end - dst) < len) {
1055 size = buf->
last - src;
1056 size = len + size / 2 + size / 2 * ctx->
length;
1058 cl = ngx_http_charset_get_buffer(pool, ctx, size);
1110 if (cl->
buf == NULL) {
1135 if ((
size_t) (b->
end - b->
start) >= size) {
1153 if (cl->
buf == NULL) {
1172 u_char *p, *dst2src, **pp;
1183 src = ngx_http_add_charset(&mcf->
charsets, &value[1]);
1188 dst = ngx_http_add_charset(&mcf->
charsets, &value[2]);
1195 "\"charset_map\" between the same charsets "
1196 "\"%V\" and \"%V\"", &value[1], &value[2]);
1202 if ((src == table->
src && dst == table->
dst)
1203 || (src == table->
dst && dst == table->
src))
1206 "duplicate \"charset_map\" between "
1207 "\"%V\" and \"%V\"", &value[1], &value[2]);
1213 if (table == NULL) {
1232 if (dst2src == NULL) {
1236 pp = (u_char **) &table->
dst2src[0];
1239 for (i = 0; i < 128; i++) {
1243 dst2src[
i] = (u_char) i;
1246 for (; i < 256; i++) {
1263 for (i = 0; i < 128; i++) {
1268 for (; i < 256; i++) {
1282 cf->
handler = ngx_http_charset_map;
1305 u_char *p, *dst2src, **pp;
1320 src =
ngx_hextoi(value[0].data, value[0].len);
1323 "invalid value \"%V\"", &value[0]);
1333 *p++ = (u_char) (value[1].len / 2);
1335 for (i = 0; i < value[1].
len; i += 2) {
1339 "invalid value \"%V\"", &value[1]);
1343 *p++ = (u_char) dst;
1357 "invalid value \"%V\"", &value[1]);
1361 pp = (u_char **) &table->
dst2src[0];
1363 dst2src = pp[n >> 8];
1365 if (dst2src == NULL) {
1367 if (dst2src == NULL) {
1371 pp[n >> 8] = dst2src;
1374 dst2src[n & 0xff] = (u_char) src;
1377 dst =
ngx_hextoi(value[1].data, value[1].len);
1380 "invalid value \"%V\"", &value[1]);
1384 table->
src2dst[src] = (u_char) dst;
1385 table->
dst2src[dst] = (u_char) src;
1404 return "is duplicate";
1417 if (value[1].data[0] ==
'$') {
1418 var.
len = value[1].
len - 1;
1433 ngx_http_charset_filter_module);
1435 *cp = ngx_http_add_charset(&mcf->
charsets, &value[1]);
1451 for (i = 0; i < charsets->
nelts; i++) {
1461 if (i < charsets->nelts) {
1486 ngx_http_charset_create_main_conf(
ngx_conf_t *cf)
1501 if (ngx_array_init(&mcf->
tables, cf->
pool, 1,
1520 ngx_http_charset_create_loc_conf(
ngx_conf_t *cf)
1545 ngx_http_charset_merge_loc_conf(
ngx_conf_t *cf,
void *parent,
void *child)
1556 ngx_http_charset_default_types)
1581 ngx_http_charset_filter_module);
1592 if (recode == NULL) {
1604 ngx_http_charset_postconfiguration(
ngx_conf_t *cf)
1606 u_char **src, **dst;
1615 ngx_http_charset_filter_module);
1627 if (c == tables[t].src && recode[i].dst == tables[t].dst) {
1631 if (c == tables[t].dst && recode[i].dst == tables[t].src) {
1637 "no \"charset_map\" between the charsets \"%V\" and \"%V\"",
1670 src[tables[t].
dst] = tables[t].src2dst;
1671 dst[tables[t].src] = tables[t].dst2src;