Hubbub
in_head.c
Go to the documentation of this file.
1 /*
2  * This file is part of Hubbub.
3  * Licensed under the MIT License,
4  * http://www.opensource.org/licenses/mit-license.php
5  * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
6  */
7 
8 #include <assert.h>
9 #include <string.h>
10 
11 #include <parserutils/charset/mibenum.h>
12 
13 #include "treebuilder/modes.h"
14 #include "treebuilder/internal.h"
16 
17 #include "charset/detect.h"
18 
19 #include "utils/utils.h"
20 #include "utils/string.h"
21 
22 
30  const hubbub_token *token)
31 {
32  static uint16_t utf16, utf16be, utf16le;
33  uint16_t charset_enc = 0;
34  uint16_t content_type_enc = 0;
35  size_t i;
36  hubbub_error err = HUBBUB_OK;
37 
38  err = insert_element(treebuilder, &token->data.tag, false);
39  if (err != HUBBUB_OK)
40  return err;
41 
44  if (treebuilder->tree_handler->encoding_change == NULL)
45  return err;
46 
47  /* Grab UTF-16 MIBenums */
48  if (utf16 == 0) {
49  utf16 = parserutils_charset_mibenum_from_name(
50  "utf-16", SLEN("utf-16"));
51  utf16be = parserutils_charset_mibenum_from_name(
52  "utf-16be", SLEN("utf-16be"));
53  utf16le = parserutils_charset_mibenum_from_name(
54  "utf-16le", SLEN("utf-16le"));
55  assert(utf16 != 0 && utf16be != 0 && utf16le != 0);
56  }
57 
58  for (i = 0; i < token->data.tag.n_attributes; i++) {
59  hubbub_attribute *attr = &token->data.tag.attributes[i];
60 
61  if (hubbub_string_match(attr->name.ptr, attr->name.len,
62  (const uint8_t *) "charset",
63  SLEN("charset")) == true) {
64  /* Extract charset */
65  charset_enc = parserutils_charset_mibenum_from_name(
66  (const char *) attr->value.ptr,
67  attr->value.len);
68  } else if (hubbub_string_match(attr->name.ptr, attr->name.len,
69  (const uint8_t *) "content",
70  SLEN("content")) == true) {
71  /* Extract charset from Content-Type */
72  content_type_enc = hubbub_charset_parse_content(
73  attr->value.ptr, attr->value.len);
74  }
75  }
76 
77  /* Fall back, if necessary */
78  if (charset_enc == 0 && content_type_enc != 0)
79  charset_enc = content_type_enc;
80 
81  if (charset_enc != 0) {
82  const char *name;
83 
84  hubbub_charset_fix_charset(&charset_enc);
85 
86  /* Change UTF-16 to UTF-8 */
87  if (charset_enc == utf16le || charset_enc == utf16be ||
88  charset_enc == utf16) {
89  charset_enc = parserutils_charset_mibenum_from_name(
90  "UTF-8", SLEN("UTF-8"));
91  }
92 
93  name = parserutils_charset_mibenum_to_name(charset_enc);
94 
95  err = treebuilder->tree_handler->encoding_change(
96  treebuilder->tree_handler->ctx, name);
97  }
98 
99  return err;
100 }
101 
110  const hubbub_token *token)
111 {
112  hubbub_error err = HUBBUB_OK;
113  bool handled = false;
114 
115  switch (token->type) {
117  err = process_characters_expect_whitespace(treebuilder,
118  token, true);
119  break;
121  err = process_comment_append(treebuilder, token,
122  treebuilder->context.element_stack[
123  treebuilder->context.current_node].node);
124  break;
127  break;
129  {
131  &token->data.tag.name);
132 
133  if (type == HTML) {
134  /* Process as if "in body" */
135  err = handle_in_body(treebuilder, token);
136  } else if (type == BASE || type == COMMAND || type == LINK) {
137  err = insert_element(treebuilder, &token->data.tag,
138  false);
139 
141  } else if (type == META) {
142  err = process_meta_in_head(treebuilder, token);
143  } else if (type == TITLE) {
144  err = parse_generic_rcdata(treebuilder, token, true);
145  } else if (type == NOFRAMES || type == STYLE) {
146  err = parse_generic_rcdata(treebuilder, token, false);
147  } else if (type == NOSCRIPT) {
148  if (treebuilder->context.enable_scripting) {
149  err = parse_generic_rcdata(treebuilder, token,
150  false);
151  } else {
152  err = insert_element(treebuilder,
153  &token->data.tag, true);
154  if (err != HUBBUB_OK)
155  return err;
156 
157  treebuilder->context.mode = IN_HEAD_NOSCRIPT;
158  }
159  } else if (type == SCRIPT) {
163  err = parse_generic_rcdata(treebuilder, token, false);
164  } else if (type == HEAD) {
166  } else {
167  err = HUBBUB_REPROCESS;
168  }
169  }
170  break;
172  {
174  &token->data.tag.name);
175 
176  if (type == HEAD) {
177  handled = true;
178  } else if (type == HTML || type == BODY || type == BR) {
179  err = HUBBUB_REPROCESS;
180  }
181  }
182  break;
183  case HUBBUB_TOKEN_EOF:
184  err = HUBBUB_REPROCESS;
185  break;
186  }
187 
188  if (handled || err == HUBBUB_REPROCESS) {
189  hubbub_ns ns;
190  element_type otype;
191  void *node;
192 
193  element_stack_pop(treebuilder, &ns, &otype, &node);
194 
195  treebuilder->tree_handler->unref_node(
196  treebuilder->tree_handler->ctx,
197  node);
198 
199  treebuilder->context.mode = AFTER_HEAD;
200  }
201 
202  return err;
203 }
#define SLEN(s)
Definition: utils.h:21
Definition: internal.h:20
hubbub_error process_comment_append(hubbub_treebuilder *treebuilder, const hubbub_token *token, void *parent)
Process a comment token, appending it to the given parent.
Definition: treebuilder.c:421
hubbub_token_type type
The token type.
Definition: types.h:120
Definition: internal.h:20
void * ctx
Context pointer.
Definition: tree.h:292
Token data.
Definition: types.h:119
hubbub_string name
Tag name.
Definition: types.h:110
Definition: internal.h:16
hubbub_tree_handler * tree_handler
Callback table.
Definition: internal.h:122
const uint8_t * ptr
Pointer to data.
Definition: types.h:77
element_type
Definition: internal.h:13
hubbub_error process_characters_expect_whitespace(hubbub_treebuilder *treebuilder, const hubbub_token *token, bool insert_into_current_node)
Process a character token in cases where we expect only whitespace.
Definition: treebuilder.c:375
hubbub_string value
Attribute value.
Definition: types.h:87
Definition: internal.h:17
hubbub_string name
Attribute name.
Definition: types.h:86
Definition: internal.h:25
insertion_mode mode
The current insertion mode.
Definition: internal.h:75
size_t len
Byte length of string.
Definition: types.h:78
Definition: internal.h:22
hubbub_tree_encoding_change encoding_change
Change encoding.
Definition: tree.h:290
Definition: internal.h:22
uint16_t hubbub_charset_parse_content(const uint8_t *value, uint32_t valuelen)
Parse a content= attribute's value.
Definition: detect.c:369
hubbub_error handle_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token)
Handle tokens in "in body" insertion mode.
Definition: in_body.c:123
bool enable_scripting
Whether scripting is enabled.
Definition: internal.h:95
const char * name
Definition: initial.c:22
hubbub_treebuilder_context context
Our context.
Definition: internal.h:120
hubbub_error insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag_name, bool push)
Create element and insert it into the DOM, potentially pushing it on the stack.
Definition: treebuilder.c:752
hubbub_error handle_in_head(hubbub_treebuilder *treebuilder, const hubbub_token *token)
Handle token in "in head" insertion mode.
Definition: in_head.c:109
hubbub_attribute * attributes
Array of attribute data.
Definition: types.h:112
hubbub_error
Definition: errors.h:18
void * node
Node pointer.
Definition: internal.h:54
hubbub_tree_unref_node unref_node
Unreference node.
Definition: tree.h:279
element_type type
Definition: treebuilder.c:26
No error.
Definition: errors.h:19
hubbub_tag tag
Definition: types.h:125
Tag attribute data.
Definition: types.h:84
hubbub_ns
Possible namespaces.
Definition: types.h:63
union hubbub_token::@3 data
Type-specific data.
Definition: internal.h:17
static hubbub_error process_meta_in_head(hubbub_treebuilder *treebuilder, const hubbub_token *token)
Process a meta tag as if "in head".
Definition: in_head.c:29
bool hubbub_string_match(const uint8_t *a, size_t a_len, const uint8_t *b, size_t b_len)
Check that one string is exactly equal to another.
Definition: string.c:23
hubbub_error element_stack_pop(hubbub_treebuilder *treebuilder, hubbub_ns *ns, element_type *type, void **node)
Pop an element off the stack of open elements.
Definition: treebuilder.c:1113
element_type element_type_from_name(hubbub_treebuilder *treebuilder, const hubbub_string *tag_name)
Convert an element name into an element type.
Definition: treebuilder.c:987
void hubbub_charset_fix_charset(uint16_t *charset)
Fix charsets, according to the override table in HTML5, section 8.2.2.2.
Definition: detect.c:667
Treebuilder object.
Definition: internal.h:116
element_context * element_stack
Stack of open elements.
Definition: internal.h:79
uint32_t n_attributes
Count of attributes.
Definition: types.h:111
uint32_t current_node
Index of current node in stack.
Definition: internal.h:81
Definition: internal.h:19
hubbub_error parse_generic_rcdata(hubbub_treebuilder *treebuilder, const hubbub_token *token, bool rcdata)
Trigger parsing of generic (R)CDATA.
Definition: treebuilder.c:464