Sunday, June 21, 2015

Parse food recipes from Python with pylangparser

Link to project page

from pylangparser import * QTY = Symbols(r'\d+') QTY_HALF = Symbols(r'(\d+ ){0,1}1/2') QTYS = QTY_HALF & QTY ONION = Symbols(r'onion') ROAST = Symbols(r'roast') SOY_SAUCE = Symbols(r'soy sauce') STALK_CELERY = Symbols(r'stalk celery') WATER = Symbols(r'water') YOGURT = Symbols(r'yogurt') FAT = Symbols(r'\d+%') INGREDIENT = \ ONION & \ ROAST & \ SOY_SAUCE & \ STALK_CELERY & \ WATER & \ YOGURT RANDOM_WORD = Symbols(r'[a-zA-Z]+')
LBS = Symbols(r'lbs') TABLESPOON = Symbols(r'tablespoon') TABLESPOONS = Symbols(r'tablespoons') TEASPOON = Symbols(r'teaspoon') TEASPOONS = Symbols(r'teaspoons') QUART = Symbols(r'quart') QUARTS = Symbols(r'quarts') DASH = Symbols(r'dash') DASHES = Symbols(r'dashes') CUP = Symbols(r'cup') CUPS = Symbols(r'cups') METRICS = \ LBS & \ TABLESPOON & \ TABLESPOONS & \ TEASPOON & \ TEASPOONS & \ QUART & \ QUARTS & \ DASH & \ DASHES & \ CUP & \ CUPS IGNORE_CHARS = Ignore(r'[ \n,.-]+') IGNORES = IGNORE_CHARS # order is important, first token that matches will be considered TOKENS = FAT & QTYS & METRICS & IGNORES & INGREDIENT & RANDOM_WORD
food_recipe = """ 3 lbs chuck roast 1 quart water 1 1/2 quarts water 1 onion, chopped 1 stalk celery, chopped 2 tablespoons soy sauce 1/2 cup 2% Greek yogurt """ product = \ SymbolsParser(ONION) | \ SymbolsParser(ROAST) | \ SymbolsParser(SOY_SAUCE) | \ SymbolsParser(STALK_CELERY) | \ SymbolsParser(WATER) | \ SymbolsParser(YOGURT) measure = \ SymbolsParser(CUP) | \ SymbolsParser(CUPS) | \ SymbolsParser(LBS) | \ SymbolsParser(QUART) | \ SymbolsParser(QUARTS) | \ SymbolsParser(TABLESPOON) | \ SymbolsParser(TABLESPOONS) qty = \ SymbolsParser(QTY_HALF) | \ SymbolsParser(QTY) fat = SymbolsParser(FAT) ingredient = qty << ZeroOrMore(measure) << \ Repeat(fat | product | IgnoreResult(SymbolsParser(RANDOM_WORD))) recipe = ZeroOrMore(ingredient)
# extract tokens lexer = Lexer(TOKENS) tokens = lexer.parseTokens(food_recipe, False) print(tokens) parser = AllTokensConsumed(recipe) # generating AST ast = parser(tokens, 0) print("\nast:") ast.pretty_print() for group in ast: if group.check_parser(ingredient): print("\ningredient:") for sub_group in group: if sub_group.check_parser(qty): print("qty: %s" % sub_group.get_token()) if sub_group.check_parser(measure): print("measure: %s" % sub_group.get_token()) if sub_group.check_parser(product): print("product: %s" % sub_group.get_token()) if sub_group.check_parser(fat): print("fat level: %s" % sub_group.get_token())
output:
-------
[(3, \d+), (lbs, lbs), (chuck, [a-zA-Z]+), (roast, roast), (1, \d+), (quart, quart), (water, water), (1 1/2, (\d+ ){0,1}1/2), (quarts, quarts), (water, water), (1, \d+), (onion, onion), (chopped, [a-zA-Z]+), (1, \d+), (stalk celery, stalk celery), (chopped, [a-zA-Z]+), (2, \d+), (tablespoons, tablespoons), (soy sauce, soy sauce), (1/2, (\d+ ){0,1}1/2), (cup, cup), (2%, \d+%), (Greek, [a-zA-Z]+), (yogurt, yogurt)] ast: [[['3'], ['lbs'], ['roast']], [['1'], ['quart'], ['water']], [['1 1/2'], ['quarts'], ['water']], [['1'], ['onion']], [['1'], ['stalk celery']], [['2'], ['tablespoons'], ['soy sauce']], [['1/2'], ['cup'], ['2%'], ['yogurt']]] ingredient: qty: 3 measure: lbs product: roast ingredient: qty: 1 measure: quart product: water ingredient: qty: 1 1/2 measure: quarts product: water ingredient: qty: 1 product: onion ingredient: qty: 1 product: stalk celery ingredient: qty: 2 measure: tablespoons product: soy sauce ingredient: qty: 1/2 measure: cup fat level: 2% product: yogurt

No comments:

Post a Comment