PostgreSQL Source Code git master
psqlscan_int.h
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * psqlscan_int.h
4 * lexical scanner internal declarations
5 *
6 * This file declares the PsqlScanStateData structure used by psqlscan.l
7 * and shared by other lexers compatible with it, such as psqlscanslash.l.
8 *
9 * One difficult aspect of this code is that we need to work in multibyte
10 * encodings that are not ASCII-safe. A "safe" encoding is one in which each
11 * byte of a multibyte character has the high bit set (it's >= 0x80). Since
12 * all our lexing rules treat all high-bit-set characters alike, we don't
13 * really need to care whether such a byte is part of a sequence or not.
14 * In an "unsafe" encoding, we still expect the first byte of a multibyte
15 * sequence to be >= 0x80, but later bytes might not be. If we scan such
16 * a sequence as-is, the lexing rules could easily be fooled into matching
17 * such bytes to ordinary ASCII characters. Our solution for this is to
18 * substitute 0xFF for each non-first byte within the data presented to flex.
19 * The flex rules will then pass the FF's through unmolested. The
20 * psqlscan_emit() subroutine is responsible for looking back to the original
21 * string and replacing FF's with the corresponding original bytes.
22 *
23 * Another interesting thing we do here is scan different parts of the same
24 * input with physically separate flex lexers (ie, lexers written in separate
25 * .l files). We can get away with this because the only part of the
26 * persistent state of a flex lexer that depends on its parsing rule tables
27 * is the start state number, which is easy enough to manage --- usually,
28 * in fact, we just need to set it to INITIAL when changing lexers. But to
29 * make that work at all, we must use re-entrant lexers, so that all the
30 * relevant state is in the yyscan_t attached to the PsqlScanState;
31 * if we were using lexers with separate static state we would soon end up
32 * with dangling buffer pointers in one or the other. Also note that this
33 * is unlikely to work very nicely if the lexers aren't all built with the
34 * same flex version, or if they don't use the same flex options.
35 *
36 *
37 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
38 * Portions Copyright (c) 1994, Regents of the University of California
39 *
40 * src/include/fe_utils/psqlscan_int.h
41 *
42 *-------------------------------------------------------------------------
43 */
44#ifndef PSQLSCAN_INT_H
45#define PSQLSCAN_INT_H
46
47#include "fe_utils/psqlscan.h"
48
49/*
50 * These are just to allow this file to be compilable standalone for header
51 * validity checking; in actual use, this file should always be included
52 * from the body of a flex file, where these symbols are already defined.
53 */
54typedef struct yy_buffer_state *YY_BUFFER_STATE;
55typedef void *yyscan_t;
56
57/*
58 * We use a stack of flex buffers to handle substitution of psql variables.
59 * Each stacked buffer contains the as-yet-unread text from one psql variable.
60 * When we pop the stack all the way, we resume reading from the outer buffer
61 * identified by scanbufhandle.
62 */
63typedef struct StackElem
64{
65 YY_BUFFER_STATE buf; /* flex input control structure */
66 char *bufstring; /* data actually being scanned by flex */
67 char *origstring; /* copy of original data, if needed */
68 char *varname; /* name of variable providing data, or NULL */
69 struct StackElem *next;
71
72/*
73 * All working state of the lexer must be stored in PsqlScanStateData
74 * between calls. This allows us to have multiple open lexer operations,
75 * which is needed for nested include files. The lexer itself is not
76 * recursive, but it must be re-entrant.
77 */
78typedef struct PsqlScanStateData
79{
80 yyscan_t scanner; /* Flex's state for this PsqlScanState */
81
82 PQExpBuffer output_buf; /* current output buffer */
83
84 StackElem *buffer_stack; /* stack of variable expansion buffers */
85
86 /*
87 * These variables always refer to the outer buffer, never to any stacked
88 * variable-expansion buffer.
89 */
91 char *scanbuf; /* start of outer-level input buffer */
92 const char *scanline; /* current input line at outer level */
93
94 /* safe_encoding, curline, refline are used by emit() to replace FFs */
95 int encoding; /* encoding being used now */
96 bool safe_encoding; /* is current encoding "safe"? */
97 bool std_strings; /* are string literals standard? */
98 const char *curline; /* actual flex input string for cur buf */
99 const char *refline; /* original data for cur buffer */
100
101 /* status for psql_scan_get_location() */
102 int cur_line_no; /* current line#, or 0 if no yylex done */
103 const char *cur_line_ptr; /* points into cur_line_no'th line in scanbuf */
104
105 /*
106 * All this state lives across successive input lines, until explicitly
107 * reset by psql_scan_reset. start_state is adopted by yylex() on entry,
108 * and updated with its finishing state on exit.
109 */
110 int start_state; /* yylex's starting/finishing state */
111 int state_before_str_stop; /* start cond. before end quote */
112 int paren_depth; /* depth of nesting in parentheses */
113 int xcdepth; /* depth of nesting in slash-star comments */
114 char *dolqstart; /* current $foo$ quote start string */
115
116 /*
117 * State to track boundaries of BEGIN ... END blocks in function
118 * definitions, so that semicolons do not send query too early.
119 */
120 int identifier_count; /* identifiers since start of statement */
121 char identifiers[4]; /* records the first few identifiers */
122 int begin_depth; /* depth of begin/end pairs */
123
124 /*
125 * Callback functions provided by the program making use of the lexer,
126 * plus a void* callback passthrough argument.
127 */
131
132
133/*
134 * Functions exported by psqlscan.l, but only meant for use within
135 * compatible lexers.
136 */
138 const char *newstr, const char *varname);
142 const char *varname);
144 const char *txt, int len,
145 char **txtcopy);
146extern void psqlscan_emit(PsqlScanState state, const char *txt, int len);
148 const char *txt, int len);
150 const char *txt, int len,
151 PsqlScanQuoteType quote);
153 const char *txt, int len);
154
155#endif /* PSQLSCAN_INT_H */
void * yyscan_t
Definition: cubedata.h:65
const void size_t len
PsqlScanQuoteType
Definition: psqlscan.h:53
void * yyscan_t
Definition: psqlscan_int.h:55
void psqlscan_escape_variable(PsqlScanState state, const char *txt, int len, PsqlScanQuoteType quote)
Definition: psqlscan.l:1585
struct StackElem StackElem
YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len, char **txtcopy)
Definition: psqlscan.l:1476
void psqlscan_select_top_buffer(PsqlScanState state)
Definition: psqlscan.l:1431
struct PsqlScanStateData PsqlScanStateData
void psqlscan_emit(PsqlScanState state, const char *txt, int len)
Definition: psqlscan.l:1517
struct yy_buffer_state * YY_BUFFER_STATE
Definition: psqlscan_int.h:54
char * psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
Definition: psqlscan.l:1549
void psqlscan_push_new_buffer(PsqlScanState state, const char *newstr, const char *varname)
Definition: psqlscan.l:1374
void psqlscan_pop_buffer_stack(PsqlScanState state)
Definition: psqlscan.l:1413
void psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
Definition: psqlscan.l:1614
bool psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
Definition: psqlscan.l:1454
const char * refline
Definition: psqlscan_int.h:99
StackElem * buffer_stack
Definition: psqlscan_int.h:84
const char * scanline
Definition: psqlscan_int.h:92
PQExpBuffer output_buf
Definition: psqlscan_int.h:82
const PsqlScanCallbacks * callbacks
Definition: psqlscan_int.h:128
const char * cur_line_ptr
Definition: psqlscan_int.h:103
YY_BUFFER_STATE scanbufhandle
Definition: psqlscan_int.h:90
const char * curline
Definition: psqlscan_int.h:98
char * origstring
Definition: psqlscan_int.h:67
char * varname
Definition: psqlscan_int.h:68
YY_BUFFER_STATE buf
Definition: psqlscan_int.h:65
char * bufstring
Definition: psqlscan_int.h:66
struct StackElem * next
Definition: psqlscan_int.h:69
Definition: regguts.h:323