mahoro.git  about / heads / tags
Ruby interface to libmagic
blob b687e218c2330a583fa7578e4acbd7e8eacf1287 14437 bytes (raw)
$ git show HEAD:ext/mahoro/mahoro.c	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
 
/*
 * This file is Public Domain.
 *
 * Note: The current maintainer (Eric Wong) respects and preserves the
 * original coding style of the original author (Shu-yu Guo) in case
 * he ever chooses to return to this project.
 */

#include <ruby.h>
#include <magic.h>
#include "nogvl_compat.h"

#ifndef RSTRING_LEN
#  define RSTRING_LEN(s)->len
#endif
#ifndef RSTRING_PTR
#  define RSTRING_PTR(s)->ptr
#endif

static VALUE eMahoroError;
static ID id_to_path;

struct nogvl_args {
	magic_t cookie;
	union {
		const char *path;
		int fd;
	} as;
};

/* :nodoc: called automatically by GC */
static void
mahoro_free(ptr)
	void *ptr;
{
	if (ptr)
		magic_close((magic_t)ptr);
}

/* :nodoc: called automatically on Mahoro#initialize */
static VALUE
mahoro_allocate(klass)
	VALUE klass;
{
	return Data_Wrap_Struct(klass, 0, mahoro_free, 0);
}

static void *
nogvl_load(ptr)
	void *ptr;
{
	struct nogvl_args *args = ptr;

	return magic_load(args->cookie, args->as.path) ? ptr : NULL;
}

/*
 * call-seq:
 *	Mahoro.new(flags = Mahoro::NONE, path = nil)	->	mahoro_obj
 *
 * Create and initialize a new Mahoro object.
 * +flags+ may be one or more of any combination of the Mahoro:: constants
 * supported by Mahoro#flags=.
 * +path+ (if not nil) is a colon-separated list of database files, see
 * Mahoro#load.
 *
 * If +path+ is not given (or nil), the default database is used.
 * Consult your libmagic(3) documentation for the location of that file
 * as it varies by installation.
 */
static VALUE
mahoro_initialize(argc, argv, self)
	int argc;
	VALUE *argv, self;
{
	int flags = MAGIC_NONE;
	struct nogvl_args args;
	VALUE vpath, vflags;

	args.as.path = NULL;

	switch(rb_scan_args(argc, argv, "02", &vflags, &vpath)) {
		case 2:
			if(!NIL_P(vpath)) {
				args.as.path = StringValueCStr(vpath);
			}
			/* fallthrough */
		case 1:
			flags = FIX2INT(vflags);
			break;
	}

	if(!(args.cookie = magic_open(flags))) {
		rb_raise(eMahoroError, "failed to initialize magic cookie");
	}

	if(NOGVL(nogvl_load, &args, RUBY_UBF_IO, NULL)) {
		rb_raise(eMahoroError, "failed to load database: %s",
				      magic_error(args.cookie));
	}

	DATA_PTR(self) = args.cookie;

	return self;
}

static void *
nogvl_file(ptr)
	void *ptr;
{
	struct nogvl_args *args = ptr;

	return (void *)magic_file(args->cookie, args->as.path);
}

/*
 * call-seq:
 *	mahoro_obj.file(filename)	->	String
 *
 * Returns a textual description of the contents of the +filename+ given.
 * Use Mahoro#buffer instead of this method if the contents of your
 * file is already in memory.
 * Raises Mahoro::Error on failed lookups.
 */
static VALUE
mahoro_file(self, path)
	VALUE self, path;
{
	const char *msg;
	struct nogvl_args args;

	args.cookie = (magic_t)DATA_PTR(self);

	/* Pathname objects may be transformed via #to_path */
	if (rb_respond_to(path, id_to_path))
		path = rb_funcall(path, id_to_path, 0);

	args.as.path = StringValueCStr(path);

	if(!(msg = NOGVL(nogvl_file, &args, RUBY_UBF_IO, NULL))) {
		rb_raise(eMahoroError, "failed lookup: %s",
			magic_error(args.cookie));
	}

	return rb_str_new2(msg);
}

/*
 * call-seq:
 *	mahoro_obj.buffer(buffer)	->	String
 *
 * Returns a textual description of the contents of the +buffer+ given.
 * +buffer+ should be a String object.
 * Use Mahoro#file instead of this method if the contents is not already
 * in memory (and possibly too large to fit into memory).
 * Raises Mahoro::Error on failed lookups.
 */
static VALUE
mahoro_buffer(self, input)
	VALUE self, input;
{
	const char *msg;
	magic_t cookie = (magic_t)DATA_PTR(self);

	StringValue(input);

	if(!(msg = magic_buffer(cookie, RSTRING_PTR(input),
	                                RSTRING_LEN(input)))) {
		rb_raise(eMahoroError, "failed lookup: %s", magic_error(cookie));
	}

	return rb_str_new2(msg);
}

/*
 * call-seq:
 *	mahoro_obj.flags = flags
 *
 * Change the behavior of an already-initialized Mahoro object.  The
 * behavior of a Mahoro object is specified at load time, but may be
 * changed after-the-fact.
 * +flags+ is a bitwise (OR) mask of one or more of the following constants
 * in the Mahoro namespace:
 *
 * - APPLE
 * - CHECK
 * - COMPRESS
 * - CONTINUE
 * - DEBUG
 * - DEVICES
 * - ERROR
 * - MIME
 * - MIME_ENCODING
 * - MIME_TYPE
 * - NONE
 * - NO_CHECK_APPTYPE
 * - NO_CHECK_COMPRESS
 * - NO_CHECK_ELF
 * - NO_CHECK_ENCODING
 * - NO_CHECK_SOFT
 * - NO_CHECK_TAR
 * - NO_CHECK_TEXT
 * - NO_CHECK_TOKENS
 * - PRESERVE_ATIME
 * - RAW
 * - SYMLINK
 */
static VALUE
mahoro_set_flags(self, flags)
	VALUE self, flags;
{
	magic_t cookie = (magic_t)DATA_PTR(self);

	return INT2FIX(magic_setflags(cookie, FIX2INT(flags)));
}

static void *
nogvl_check(ptr)
	void *ptr;
{
	struct nogvl_args *args = ptr;

	return magic_check(args->cookie, args->as.path) ? ptr : NULL;
}

/*
 * call-seq:
 *	mahoro_obj.check(path = nil)	->	true or false
 *
 * This is used to check the validity of entries in the colon separated
 * database files passed in as +path+.  If +path+ is not passed (or nil),
 * this will check the default database.
 */
static VALUE
mahoro_check(argc, argv, self)
	int argc;
	VALUE *argv, self;
{
	struct nogvl_args args;
	VALUE vpath;

	args.cookie = (magic_t)DATA_PTR(self);
	args.as.path = NULL;

	switch(rb_scan_args(argc, argv, "01", &vpath)) {
		case 1:
			if(!NIL_P(vpath)) {
				args.as.path = StringValueCStr(vpath);
			}
			break;
	}

	if(!NOGVL(nogvl_check, &args, RUBY_UBF_IO, 0)) {
		return Qtrue;
	} else {
		return Qfalse;
	}
}

static void *
nogvl_compile(ptr)
	void *ptr;
{
	struct nogvl_args *args = ptr;

	return magic_compile(args->cookie, args->as.path) ? ptr : NULL;
}

/*
 * call-seq:
 *	mahoro_obj.compile(path)	->	true
 *
 * Compile the the colon separated list of database files passed in as +path+.
 * It returns true on success and raises Mahoro::Error on failure.
 * Compiled files created are named from the +File.basename+ of each file
 * argument with ".mgc" appended to it.
 *
 * There is no need to use this function if you are using the default magic(5)
 * database on your operating system.  This is only needed if you require
 * additional magic not in the default magic database.
 *
 * Users of this method are likely to need Mahoro#load (and vice-versa).
 */
static VALUE
mahoro_compile(self, path)
	VALUE self, path;
{
	struct nogvl_args args;

	args.cookie = (magic_t)DATA_PTR(self);
	args.as.path = StringValueCStr(path);

	if(NOGVL(nogvl_compile, &args, RUBY_UBF_IO, NULL)) {
		rb_raise(eMahoroError, "failed compile: %s",
			magic_error(args.cookie));
	}

	return Qtrue;
}

/*
 * call-seq:
 *	Mahoro.compile(path)	->	true
 *
 * This is a wrapper around the Mahoro#compile instance method, but does not
 * require an existing Mahoro object.  Use the instance method unless you only
 * need to test the validity of a magic(5) database.
 */
static VALUE
mahoro_s_compile(klass, path)
	VALUE klass, path;
{
	VALUE m = rb_funcall(klass, rb_intern("new"), 0, 0);

	return mahoro_compile(m, path);
}

/*
 * call-seq:
 *	mahoro_obj.load(path)	->	mahoro_obj
 *
 * Used to load the the colon-separated list of database files (+path+).
 * The ".mgc" suffix will be added to each filename where appropriate.
 * This will raise Mahoro::Error on failure.
 *
 * There is no need to use this function if you are using the default magic(5)
 * database on your operating system.  This is only needed if you require
 * additional magic not in the default magic database.
 *
 * The default database file is named by the MAGIC environment variable.
 * Consult your libmagic installation documentation for the location of
 * your default database file name.
 *
 * Users of this method are likely to need Mahoro#compile (and vice-versa).
 */
static VALUE
mahoro_load(self, path)
	VALUE self, path;
{
	struct nogvl_args args;

	args.cookie = (magic_t)DATA_PTR(self);
	args.as.path = StringValueCStr(path);

	if(NOGVL(nogvl_load, &args, RUBY_UBF_IO, NULL)) {
		rb_raise(eMahoroError, "failed load: %s",
			magic_error(args.cookie));
	}

	return self;
}

void Init_mahoro(void)
{
	VALUE cMahoro;
	/*
	 * Mahoro is a simple interface to libmagic.
	 *
	 * Common use cases:
	 *
	 *	# initialize a Mahoro object for reading MIME types
	 *	mahoro_obj = Mahoro.new(Mahoro::MIME)
	 *
	 *	# get the MIME type of a file on disk
	 *	# This is ideal for large files which you do not need to
	 *	# read in their entirety.
	 *	mahoro_obj.file('/path/to/file.c') -> 'text/x-c'
	 *
	 *	# get the MIME type of a string buffer
	 *	# This is only ideal if you already have the buffer in
	 *	# memory or intend to process it soon
	 *	str = File.read('/path/to/file.c')
	 *	mahoro_obj.buffer(str) -> 'text/x-c'
	 *
	 *	# switch the Mahoro object to return an ASCII description
	 *	mahoro_obj.flags = Mahoro::NONE
	 *
	 *	# Similar to the above example, but the Mahoro object
	 *	# now returns a textual	description
	 *	mahoro_obj.file('/path/to/file.c') -> 'ASCII C program text'
	 *
	 *	# Similar to the above example, but the Mahoro object
	 *	# now returns a textual	description
	 *	str = File.read('/path/to/file.c')
	 *	mahoro_obj.buffer(str) -> 'ASCII C program text'
	 *
	 * Mahoro is not thread-safe by default, see Mahoro::ThreadSafe for
	 * making this module thread-safe.
	 *
	 * More information about libmagic:
	 * https://en.wikipedia.org/wiki/Libmagic
	 *
	 * Source code is available via git:
	 *	git clone https://yhbt.net/mahoro.git
	 *
	 * And viewable with a web browser via cgit:
	 *	https://yhbt.net/mahoro.git
	 *
	 * Eric Wong is the current maintainer of Mahoro.
	 * Plain-text comments, questions, bug reports, patches and
	 * pull requests are all highly welcome on the public inbox:
	 * mahoro@public-inbox.org
	 *
	 * Please generate patches using the "git format-patch" command.
	 * Use of "git send-email" to send a patch is recommended.
	 * For reviewed patches, you may also send a pull request formatted
	 * using the "git request-pull" command.
	 *
	 * Do not expect Eric to read HTML email under any circumstances.
	 */
	cMahoro      = rb_define_class("Mahoro", rb_cObject);
	eMahoroError = rb_define_class_under(cMahoro, "Error", rb_eStandardError);

	/* No special handling, the default */
	rb_define_const(cMahoro, "NONE", INT2FIX(MAGIC_NONE));

	/* print debugging messages to stderr */
	rb_define_const(cMahoro, "DEBUG", INT2FIX(MAGIC_DEBUG));

	/* Follow symlinks */
	rb_define_const(cMahoro, "SYMLINK", INT2FIX(MAGIC_SYMLINK));

	/* Check inside compressed files */
	rb_define_const(cMahoro, "COMPRESS", INT2FIX(MAGIC_COMPRESS));

	/* Look at the contents of devices */
	rb_define_const(cMahoro, "DEVICES", INT2FIX(MAGIC_DEVICES));

#ifdef MAGIC_MIME_TYPE
	/*
	 * Return only the MIME type
	 * This constant may not be defined on older systems.
	 */
	rb_define_const(cMahoro, "MIME_TYPE", INT2FIX(MAGIC_MIME_TYPE));
#endif

	/* Return all matches */
	rb_define_const(cMahoro, "CONTINUE", INT2FIX(MAGIC_CONTINUE));

	/*
	 * Check the magic database for consistency and
	 * print warnings to stderr
	 */
	rb_define_const(cMahoro, "CHECK", INT2FIX(MAGIC_CHECK));

	/* preserve access time of files analyzed */
	rb_define_const(cMahoro, "PRESERVE_ATIME",
	                      INT2FIX(MAGIC_PRESERVE_ATIME));

	/*
	 * Don't translate unprintable characters to a \\ooo octal
	 * representation
	 */
	rb_define_const(cMahoro, "RAW", INT2FIX(MAGIC_RAW));

	/*
	 * Treat operating system errors while trying to open files
	 * and follow symlinks as real errors, instead of printing
	 * them in the magic buffer.
	 */
	rb_define_const(cMahoro, "ERROR", INT2FIX(MAGIC_ERROR));

#ifdef MAGIC_MIME_ENCODING
	/*
	 * Return a MIME encoding, instead of a textual description.
	 * This constant may not be defined on older systems.
	 */
	rb_define_const(cMahoro, "MIME_ENCODING", INT2FIX(MAGIC_MIME_ENCODING));
#endif

	/* return both MIME type and encoding */
	rb_define_const(cMahoro, "MIME", INT2FIX(MAGIC_MIME));

#ifdef MAGIC_APPLE
	/*
	 * Return both Apple creator and type.
	 * This constant may not be defined on older systems.
	 */
	rb_define_const(cMahoro, "APPLE", INT2FIX(MAGIC_APPLE));
#endif

#ifdef MAGIC_NO_CHECK_COMPRESS
	/*
	 * Don't check for or inside compressed files.
	 * This constant may not be defined on older systems.
	 */
	rb_define_const(cMahoro, "NO_CHECK_COMPRESS",
		        INT2FIX(MAGIC_NO_CHECK_COMPRESS));
#endif

#ifdef MAGIC_NO_CHECK_TAR
	/*
	 * Don't examine tar files.
	 * This constant may not be defined on older systems.
	 */
	rb_define_const(cMahoro, "NO_CHECK_TAR", INT2FIX(MAGIC_NO_CHECK_TAR));
#endif

#ifdef MAGIC_NO_CHECK_SOFT
	/*
	 * Don't consult magic files.
	 * This constant may not be defined on older systems.
	 */
	rb_define_const(cMahoro, "NO_CHECK_SOFT", INT2FIX(MAGIC_NO_CHECK_SOFT));
#endif

#ifdef MAGIC_NO_CHECK_APPTYPE
	/*
	 * Don't check application type (EMX only).
	 * This constant may not be defined on older systems.
	 */
	rb_define_const(cMahoro, "NO_CHECK_APPTYPE",
	                INT2FIX(MAGIC_NO_CHECK_APPTYPE));
#endif

#ifdef MAGIC_NO_CHECK_ELF
	/*
	 * Don't check for ELF details.
	 * This constant may not be defined on older systems.
	 */
	rb_define_const(cMahoro, "NO_CHECK_ELF", INT2FIX(MAGIC_NO_CHECK_ELF));
#endif

#ifdef MAGIC_NO_CHECK_ASCII
	/*
	 * Don't check for various types of ASCII text files.
	 * This constant may not be defined on older systems.
	 */
	rb_define_const(cMahoro, "NO_CHECK_TEXT",
	                INT2FIX(MAGIC_NO_CHECK_ASCII));
#endif

#ifdef MAGIC_NO_CHECK_TOKENS
	/*
	 * Don't check for known tokens inside ASCII files.
	 * This constant may not be defined on older systems.
	 */
	rb_define_const(cMahoro, "NO_CHECK_TOKENS",
	                INT2FIX(MAGIC_NO_CHECK_TOKENS));
#endif

#ifdef MAGIC_NO_CHECK_ENCODING
	/*
	 * Don't check for text encodings.
	 * This constant may not be defined on older systems.
	 */
	rb_define_const(cMahoro, "NO_CHECK_ENCODING",
	                INT2FIX(MAGIC_NO_CHECK_ENCODING));
#endif

	rb_define_alloc_func(cMahoro, mahoro_allocate);
	rb_define_method(cMahoro, "initialize", mahoro_initialize, -1);
	rb_define_method(cMahoro, "file", mahoro_file, 1);
	rb_define_method(cMahoro, "buffer", mahoro_buffer, 1);
	rb_define_method(cMahoro, "flags=", mahoro_set_flags, 1);
	rb_define_method(cMahoro, "valid?", mahoro_check, -1);
	rb_define_singleton_method(cMahoro, "compile", mahoro_s_compile, 1);
	rb_define_method(cMahoro, "compile", mahoro_compile, 1);
	rb_define_method(cMahoro, "load", mahoro_load, 1);
	id_to_path = rb_intern("to_path");
}

/* arch-tag: mahoro */

git clone https://yhbt.net/mahoro.git