src/interfaces/ecpg/preproc/parse.pl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764

#!/usr/bin/perl
# src/interfaces/ecpg/preproc/parse.pl
# parser generator for ecpg
#
# See README.parser for some explanation of what this does.
#
# Command-line options:
#   --srcdir: where to find ecpg-provided input files (default ".")
#   --parser: the backend gram.y file to read (required, no default)
#   --output: where to write preproc.y (required, no default)
#
# Copyright (c) 2007-2025, PostgreSQL Global Development Group
#
# Written by Mike Aubury <mike.aubury@aubit.com>
#            Michael Meskes <meskes@postgresql.org>
#            Andy Colson <andy@squeakycode.net>
#
# Placed under the same license as PostgreSQL.
#

use strict;
use warnings FATAL => 'all';
use Getopt::Long;

my $srcdir = '.';
my $outfile = '';
my $parser = '';

GetOptions(
	'srcdir=s' => \$srcdir,
	'output=s' => \$outfile,
	'parser=s' => \$parser,) or die "wrong arguments";


# These hash tables define additional transformations to apply to
# grammar rules.  For bug-detection purposes, we count usages of
# each hash table entry in a second hash table, and verify that
# all the entries get used.

# Substitutions to apply to tokens whenever they are seen in a rule.
my %replace_token = (
	'BCONST' => 'ecpg_bconst',
	'FCONST' => 'ecpg_fconst',
	'Sconst' => 'ecpg_sconst',
	'XCONST' => 'ecpg_xconst',
	'IDENT' => 'ecpg_ident',
	'PARAM' => 'ecpg_param',);

my %replace_token_used;

# This hash can provide a result type to override "void" for nonterminals
# that need that, or it can specify 'ignore' to cause us to skip the rule
# for that nonterminal.  (In either case, ecpg.trailer had better provide
# a substitute rule, since the default won't do.)
my %replace_types = (
	'PrepareStmt' => '<prep>',
	'ExecuteStmt' => '<exec>',
	'opt_array_bounds' => '<index>',

	# "ignore" means: do not create type and rules for this nonterminal
	'parse_toplevel' => 'ignore',
	'stmtmulti' => 'ignore',
	'CreateAsStmt' => 'ignore',
	'DeallocateStmt' => 'ignore',
	'ColId' => 'ignore',
	'type_function_name' => 'ignore',
	'ColLabel' => 'ignore',
	'Sconst' => 'ignore',
	'opt_distinct_clause' => 'ignore',
	'PLpgSQL_Expr' => 'ignore',
	'PLAssignStmt' => 'ignore',
	'plassign_target' => 'ignore',
	'plassign_equals' => 'ignore',);

my %replace_types_used;

# This hash provides an "ignore" option or substitute expansion for any
# rule or rule alternative.  The hash key is the same "tokenlist" tag
# used for lookup in ecpg.addons.
my %replace_line = (
	# These entries excise certain keywords from the core keyword lists.
	# Be sure to account for these in ColLabel and related productions.
	'unreserved_keyword CONNECTION' => 'ignore',
	'unreserved_keyword CURRENT_P' => 'ignore',
	'unreserved_keyword DAY_P' => 'ignore',
	'unreserved_keyword HOUR_P' => 'ignore',
	'unreserved_keyword INPUT_P' => 'ignore',
	'unreserved_keyword MINUTE_P' => 'ignore',
	'unreserved_keyword MONTH_P' => 'ignore',
	'unreserved_keyword SECOND_P' => 'ignore',
	'unreserved_keyword YEAR_P' => 'ignore',
	'col_name_keyword CHAR_P' => 'ignore',
	'col_name_keyword INT_P' => 'ignore',
	'col_name_keyword VALUES' => 'ignore',
	'reserved_keyword TO' => 'ignore',
	'reserved_keyword UNION' => 'ignore',

	# some other production rules have to be ignored or replaced
	'fetch_args FORWARD opt_from_in cursor_name' => 'ignore',
	'fetch_args BACKWARD opt_from_in cursor_name' => 'ignore',
	"opt_array_bounds opt_array_bounds '[' Iconst ']'" => 'ignore',
	'VariableShowStmt SHOW var_name' => 'SHOW var_name ecpg_into',
	'VariableShowStmt SHOW TIME ZONE' => 'SHOW TIME ZONE ecpg_into',
	'VariableShowStmt SHOW TRANSACTION ISOLATION LEVEL' =>
	  'SHOW TRANSACTION ISOLATION LEVEL ecpg_into',
	'VariableShowStmt SHOW SESSION AUTHORIZATION' =>
	  'SHOW SESSION AUTHORIZATION ecpg_into',
	'returning_clause RETURNING returning_with_clause target_list' =>
	  'RETURNING returning_with_clause target_list opt_ecpg_into',
	'ExecuteStmt EXECUTE name execute_param_clause' =>
	  'EXECUTE prepared_name execute_param_clause execute_rest',
	'ExecuteStmt CREATE OptTemp TABLE create_as_target AS EXECUTE name execute_param_clause opt_with_data'
	  => 'CREATE OptTemp TABLE create_as_target AS EXECUTE prepared_name execute_param_clause opt_with_data execute_rest',
	'ExecuteStmt CREATE OptTemp TABLE IF_P NOT EXISTS create_as_target AS EXECUTE name execute_param_clause opt_with_data'
	  => 'CREATE OptTemp TABLE IF_P NOT EXISTS create_as_target AS EXECUTE prepared_name execute_param_clause opt_with_data execute_rest',
	'PrepareStmt PREPARE name prep_type_clause AS PreparableStmt' =>
	  'PREPARE prepared_name prep_type_clause AS PreparableStmt',
	'var_name ColId' => 'ECPGColId');

my %replace_line_used;


# Declare assorted state variables.

# yaccmode counts the '%%' separator lines we have seen, so that we can
# distinguish prologue, rules, and epilogue sections of gram.y.
my $yaccmode = 0;
# in /* ... */ comment?
my $comment = 0;
# in { ... } braced text?
my $brace_indent = 0;
# within a rule (production)?
my $in_rule = 0;
# count of alternatives processed within the current rule.
my $alt_count = 0;
# copymode = 1 when we want to emit the current rule to preproc.y.
# If it's 0, we have decided to ignore the current rule, and should
# skip all output until we get to the ending semicolon.
my $copymode = 0;
# tokenmode = 1 indicates we are processing %token and following declarations.
my $tokenmode = 0;
# stmt_mode = 1 indicates that we are processing the 'stmt:' rule.
my $stmt_mode = 0;
# Hacky state for emitting feature-not-supported warnings.
my $has_feature_not_supported = 0;
my $has_if_command = 0;

# %addons holds the rules loaded from ecpg.addons.
my %addons;

# %buff holds various named "buffers", which are just strings that accumulate
# the output destined for different sections of the preproc.y file.  This
# allows us to process the input in one pass even though the resulting output
# needs to appear in various places.  See dump_buffer calls below for the
# set of buffer names and the order in which they'll be dumped.
my %buff;

# %tokens contains an entry for every name we have discovered to be a token.
my %tokens;

# $non_term_id is the name of the nonterminal that is the target of the
# current rule.
my $non_term_id;

# $line holds the reconstructed rule text (that is, RHS token list) that
# we plan to emit for the current rule.
my $line = '';

# count of tokens included in $line.
my $line_count = 0;


# Open parser / output file early, to raise errors early.
open(my $parserfh, '<', $parser) or die "could not open parser file $parser";
open(my $outfh, '>', $outfile) or die "could not open output file $outfile";

# Read the various ecpg-supplied input files.
# ecpg.addons is loaded into the %addons hash, while the other files
# are just copied into buffers for verbatim output later.
preload_addons();
include_file('header', 'ecpg.header');
include_file('tokens', 'ecpg.tokens');
include_file('ecpgtype', 'ecpg.type');
include_file('trailer', 'ecpg.trailer');

# Read gram.y, and do the bulk of the processing.
main();

# Emit data from the various buffers we filled.
dump_buffer('header');
dump_buffer('tokens');
dump_buffer('types');
dump_buffer('ecpgtype');
dump_buffer('orig_tokens');
print $outfh '%%', "\n";
print $outfh 'prog: statements;', "\n";
dump_buffer('rules');
dump_buffer('trailer');

close($parserfh);

# Cross-check that we don't have dead or ambiguous addon rules.
foreach (keys %addons)
{
	die "addon rule $_ was never used\n" if $addons{$_}{used} == 0;
	die "addon rule $_ was matched multiple times\n" if $addons{$_}{used} > 1;
}

# Likewise cross-check that entries in our internal hash tables match something.
foreach (keys %replace_token)
{
	die "replace_token entry $_ was never used\n"
	  if !defined($replace_token_used{$_});
	# multiple use of a replace_token entry is fine
}

foreach (keys %replace_types)
{
	die "replace_types entry $_ was never used\n"
	  if !defined($replace_types_used{$_});
	die "replace_types entry $_ was matched multiple times\n"
	  if $replace_types_used{$_} > 1;
}

foreach (keys %replace_line)
{
	die "replace_line entry $_ was never used\n"
	  if !defined($replace_line_used{$_});
	die "replace_line entry $_ was matched multiple times\n"
	  if $replace_line_used{$_} > 1;
}


# Read the backend grammar.
sub main
{
  line: while (<$parserfh>)
	{
		chomp;

		if (/^%%/)
		{
			# New file section, so advance yaccmode.
			$yaccmode++;
			# We are no longer examining %token and related commands.
			$tokenmode = 0;
			# Shouldn't be anything else on the line.
			next line;
		}

		# Hacky check for rules that throw FEATURE_NOT_SUPPORTED
		# (do this before $_ has a chance to get clobbered)
		if ($yaccmode == 1)
		{
			$has_feature_not_supported = 1 if /ERRCODE_FEATURE_NOT_SUPPORTED/;
			$has_if_command = 1 if /^\s*if/;
		}

		# Make sure any braces are split into separate fields
		s/{/ { /g;
		s/}/ } /g;

		# Likewise for comment start/end markers
		s|\/\*| /* |g;
		s|\*\/| */ |g;

		# Now split the line into individual fields
		my @arr = split(' ');

		# Ignore empty lines
		if (!@arr)
		{
			next line;
		}

		# Once we have seen %token in the prologue, we assume all that follows
		# up to the '%%' separator is %token and associativity declarations.
		# Collect and process that as necessary.
		if ($arr[0] eq '%token' && $yaccmode == 0)
		{
			$tokenmode = 1;
		}

		if ($tokenmode == 1)
		{
			# Collect everything of interest on this line into $str.
			my $str = '';
			for my $a (@arr)
			{
				# Skip comments.
				if ($a eq '/*')
				{
					$comment++;
					next;
				}
				if ($a eq '*/')
				{
					$comment--;
					next;
				}
				if ($comment)
				{
					next;
				}

				# If it's "<something>", it's a type in a %token declaration,
				# which we should just drop so that the tokens have void type.
				if (substr($a, 0, 1) eq '<')
				{
					next;
				}

				# Remember that this is a token.  This will also make entries
				# for "%token" and the associativity keywords such as "%left",
				# which should be harmless so it's not worth the trouble to
				# avoid it.  If a token appears both in %token and in an
				# associativity declaration, we'll redundantly re-set its
				# entry, which is also OK.
				$tokens{$a} = 1;

				# Accumulate the line in $str.
				$str = $str . ' ' . $a;

				# Give our token CSTRING the same precedence as IDENT.
				if ($a eq 'IDENT' && $arr[0] eq '%nonassoc')
				{
					$str = $str . " CSTRING";
				}
			}
			# Save the lightly-processed line in orig_tokens.
			add_to_buffer('orig_tokens', $str);
			next line;
		}

		# The rest is only appropriate if we're in the rules section of gram.y
		if ($yaccmode != 1)
		{
			next line;
		}

		# Go through each word of the rule in turn
		for (
			my $fieldIndexer = 0;
			$fieldIndexer < scalar(@arr);
			$fieldIndexer++)
		{
			# Detect and ignore comments and braced action text
			if ($arr[$fieldIndexer] eq '*/' && $comment)
			{
				$comment = 0;
				next;
			}
			elsif ($comment)
			{
				next;
			}
			elsif ($arr[$fieldIndexer] eq '/*')
			{
				# start of a possibly-multiline comment
				$comment = 1;
				next;
			}
			elsif ($arr[$fieldIndexer] eq '}')
			{
				$brace_indent--;
				next;
			}
			elsif ($arr[$fieldIndexer] eq '{')
			{
				$brace_indent++;
				next;
			}
			if ($brace_indent > 0)
			{
				next;
			}

			# OK, it's not a comment or part of an action.
			# Check for ';' ending the current rule, or '|' ending the
			# current alternative.
			if ($arr[$fieldIndexer] eq ';')
			{
				if ($copymode)
				{
					# Print the accumulated rule.
					emit_rule();
					add_to_buffer('rules', ";\n\n");
				}
				else
				{
					# End of an ignored rule; revert to copymode = 1.
					$copymode = 1;
				}

				# Reset for the next rule.
				$line = '';
				$line_count = 0;
				$in_rule = 0;
				$alt_count = 0;
				$has_feature_not_supported = 0;
				$has_if_command = 0;
				next;
			}

			if ($arr[$fieldIndexer] eq '|')
			{
				if ($copymode)
				{
					# Print the accumulated alternative.
					# Increment $alt_count for each non-ignored alternative.
					$alt_count += emit_rule();
				}

				# Reset for the next alternative.
				# Start the next line with '|' if we've printed at least one
				# alternative.
				if ($alt_count > 1)
				{
					$line = '| ';
				}
				else
				{
					$line = '';
				}
				$line_count = 0;
				$has_feature_not_supported = 0;
				$has_if_command = 0;
				next;
			}

			# Apply replace_token substitution if we have one.
			if (exists $replace_token{ $arr[$fieldIndexer] })
			{
				$replace_token_used{ $arr[$fieldIndexer] }++;
				$arr[$fieldIndexer] = $replace_token{ $arr[$fieldIndexer] };
			}

			# Are we looking at a declaration of a non-terminal?
			# We detect that by seeing ':' on the end of the token or
			# as the next token.
			if (($arr[$fieldIndexer] =~ /[A-Za-z0-9]+:$/)
				|| (   $fieldIndexer + 1 < scalar(@arr)
					&& $arr[ $fieldIndexer + 1 ] eq ':'))
			{
				# Extract the non-terminal, sans : if any
				$non_term_id = $arr[$fieldIndexer];
				$non_term_id =~ tr/://d;

				# Consume the ':' if it's separate
				if (!($arr[$fieldIndexer] =~ /[A-Za-z0-9]+:$/))
				{
					$fieldIndexer++;
				}

				# Check for %replace_types entry indicating to ignore it.
				if (defined $replace_types{$non_term_id}
					&& $replace_types{$non_term_id} eq 'ignore')
				{
					# We'll ignore this nonterminal and rule altogether.
					$replace_types_used{$non_term_id}++;
					$copymode = 0;
					next line;
				}

				# OK, we want this rule.
				$copymode = 1;

				# Set special mode for the "stmt:" rule.
				if ($non_term_id eq 'stmt')
				{
					$stmt_mode = 1;
				}
				else
				{
					$stmt_mode = 0;
				}

				# Emit appropriate %type declaration for this nonterminal,
				# if it has a type; otherwise omit that.
				if (defined $replace_types{$non_term_id})
				{
					my $tstr =
						'%type '
					  . $replace_types{$non_term_id} . ' '
					  . $non_term_id;
					add_to_buffer('types', $tstr);
					$replace_types_used{$non_term_id}++;
				}

				# Emit the target part of the rule.
				# Note: the leading space is just to match
				# the rather weird pre-v18 output logic.
				my $tstr = ' ' . $non_term_id . ':';
				add_to_buffer('rules', $tstr);

				# Prepare for reading the tokens of the rule.
				$line = '';
				$line_count = 0;
				die "unterminated rule at grammar line $.\n"
				  if $in_rule;
				$in_rule = 1;
				$alt_count = 1;
				next;
			}
			elsif ($copymode)
			{
				# Not a nonterminal declaration, so just add it to $line.
				$line = $line . ' ' . $arr[$fieldIndexer];
				$line_count++;
			}
		}
	}
	die "unterminated rule at end of grammar\n"
	  if $in_rule;
	return;
}


# append a file onto a buffer.
# Arguments:  buffer_name, filename (without path)
sub include_file
{
	my ($buffer, $filename) = @_;
	my $full = "$srcdir/$filename";
	open(my $fh, '<', $full) or die;
	while (<$fh>)
	{
		chomp;
		add_to_buffer($buffer, $_);
	}
	close($fh);
	return;
}

# Emit the semantic action for the current rule.
# This function mainly accounts for any modifications specified
# by an ecpg.addons entry.
sub emit_rule_action
{
	my ($tag) = @_;

	# See if we have an addons entry; if not, just emit default action
	my $rec = $addons{$tag};
	if (!$rec)
	{
		emit_default_action(0);
		return;
	}

	# Track addons entry usage for later cross-check
	$rec->{used}++;

	my $rectype = $rec->{type};
	if ($rectype eq 'rule')
	{
		# Emit default action and then the code block.
		emit_default_action(0);
	}
	elsif ($rectype eq 'addon')
	{
		# Emit the code block wrapped in the same braces as the default action.
		add_to_buffer('rules', ' { ');
	}

	# Emit the addons entry's code block.
	# We have an array to add to the buffer, we'll add it directly instead of
	# calling add_to_buffer, which does not know about arrays.
	push(@{ $buff{'rules'} }, @{ $rec->{lines} });

	if ($rectype eq 'addon')
	{
		emit_default_action(1);
	}
	return;
}

# Add the given line to the specified buffer.
#   Pass:  buffer_name, string_to_append
# Note we add a newline automatically.
sub add_to_buffer
{
	push(@{ $buff{ $_[0] } }, "$_[1]\n");
	return;
}

# Dump the specified buffer to the output file.
sub dump_buffer
{
	my ($buffer) = @_;
	# Label the output for debugging purposes.
	print $outfh '/* ', $buffer, ' */', "\n";
	my $ref = $buff{$buffer};
	print $outfh @$ref;
	return;
}

# Emit the default action (usually token concatenation) for the current rule.
#   Pass: brace_printed boolean
# brace_printed should be true if caller already printed action's open brace.
sub emit_default_action
{
	my ($brace_printed) = @_;

	if ($stmt_mode == 0)
	{
		# Normal rule
		if ($has_feature_not_supported and not $has_if_command)
		{
			# The backend unconditionally reports
			# FEATURE_NOT_SUPPORTED in this rule, so let's emit
			# a warning on the ecpg side.
			if (!$brace_printed)
			{
				add_to_buffer('rules', ' { ');
				$brace_printed = 1;
			}
			add_to_buffer('rules',
				'mmerror(PARSE_ERROR, ET_WARNING, "unsupported feature will be passed to server");'
			);
		}

		add_to_buffer('rules', '}') if ($brace_printed);
	}
	else
	{
		# We're in the "stmt:" rule, where we need to output special actions.
		# This code assumes that no ecpg.addons entry applies.
		if ($line_count)
		{
			# Any regular kind of statement calls output_statement
			add_to_buffer('rules',
				' { output_statement(@1, 0, ECPGst_normal); }');
		}
		else
		{
			# The empty production for stmt: do nothing
		}
	}
	return;
}

# Print the accumulated rule text (in $line) and the appropriate action.
# Ordinarily return 1.  However, if the rule matches an "ignore"
# entry in %replace_line, then do nothing and return 0.
sub emit_rule
{
	# compute tag to be used as lookup key in %replace_line and %addons
	my $tag = $non_term_id . ' ' . $line;
	$tag =~ tr/|//d;
	$tag = join(' ', split(/\s+/, $tag));

	# apply replace_line substitution if any
	my $rep = $replace_line{$tag};
	if (defined $rep)
	{
		$replace_line_used{$tag}++;

		if ($rep eq 'ignore')
		{
			return 0;
		}

		# non-ignore entries replace the line, but we'd better keep any '|';
		# we don't bother to update $line_count here.
		if (index($line, '|') != -1)
		{
			$line = '| ' . $rep;
		}
		else
		{
			$line = $rep;
		}

		# recompute tag for use in emit_rule_action
		$tag = $non_term_id . ' ' . $line;
		$tag =~ tr/|//d;
		$tag = join(' ', split(/\s+/, $tag));
	}

	# Emit $line, then print the appropriate action.
	add_to_buffer('rules', $line);
	emit_rule_action($tag);
	return 1;
}

=top
	load ecpg.addons into %addons hash.  The result is something like
	%addons = {
		'stmt ClosePortalStmt' => { 'type' => 'block', 'lines' => [ "{", "if (INFORMIX_MODE)" ..., "}" ], 'used' => 0 },
		'stmt ViewStmt' => { 'type' => 'rule', 'lines' => [ "| ECPGAllocateDescr", ... ], 'used' => 0 }
	}

=cut

sub preload_addons
{
	my $filename = $srcdir . "/ecpg.addons";
	open(my $fh, '<', $filename) or die;

	# There may be multiple "ECPG:" lines and then multiple lines of code.
	# The block of code needs to be added to each of the consecutively-
	# preceding "ECPG:" records.
	my (@needsRules, @code);

	# there may be comments before the first "ECPG:" line, skip them
	my $skip = 1;
	while (<$fh>)
	{
		if (/^ECPG:\s+(\w+)\s+(.*)$/)
		{
			# Found an "ECPG:" line, so we're done skipping the header
			$skip = 0;
			my $type = $1;
			my $target = $2;
			# Normalize target so there's exactly one space between tokens
			$target = join(' ', split(/\s+/, $target));
			# Validate record type and target
			die "invalid record type $type in addon rule for $target\n"
			  unless ($type eq 'block'
				or $type eq 'addon'
				or $type eq 'rule');
			die "duplicate addon rule for $target\n"
			  if (exists $addons{$target});
			# If we had some preceding code lines, attach them to all
			# as-yet-unfinished records.
			if (@code)
			{
				for my $x (@needsRules)
				{
					push(@{ $x->{lines} }, @code);
				}
				@code = ();
				@needsRules = ();
			}
			my $record = {};
			$record->{type} = $type;
			$record->{lines} = [];
			$record->{used} = 0;
			$addons{$target} = $record;
			push(@needsRules, $record);
		}
		elsif (/^ECPG:/)
		{
			# Complain if preceding regex failed to match
			die "incorrect syntax in ECPG line: $_\n";
		}
		else
		{
			# Non-ECPG line: add to @code unless we're still skipping
			next if $skip;
			push(@code, $_);
		}
	}
	close($fh);
	# Deal with final code block
	if (@code)
	{
		for my $x (@needsRules)
		{
			push(@{ $x->{lines} }, @code);
		}
	}
	return;
}