Skip to contents

This vignette demonstrates querying food consumption expenditure data across all available NSS/HCES rounds using the consumptionsurveyindia package. The data covers 18 survey rounds from 1993 to 2024, with over 50 million household-item observations.

Setup

library(consumptionsurveyindia)
library(dplyr)
library(ggplot2)

# Connect to the harmonised consumption data
con <- ics_connect("~/data/consumption_parquet")

Food Group Expenditure Shares Over Time

How has the composition of Indian food expenditure changed across survey rounds? The stacked bar chart below shows the share of total food spending allocated to each of the 13 food groups.

shares <- ics_expenditure_shares(con)

# Simplify: keep top food groups, collapse small ones
top_groups <- shares |>
  group_by(food_group) |>
  summarise(avg_share = mean(spending_share, na.rm = TRUE), .groups = "drop") |>
  arrange(desc(avg_share)) |>
  head(8) |>
  pull(food_group)

shares_plot <- shares |>
  mutate(food_group = ifelse(food_group %in% top_groups, food_group, "Other")) |>
  group_by(nss_round, round_year, food_group) |>
  summarise(spending_share = sum(spending_share), .groups = "drop")

ggplot(shares_plot, aes(x = factor(round_year), y = spending_share, fill = food_group)) +
  geom_col(position = "stack", width = 0.7) +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_fill_brewer(palette = "Set3") +
  labs(
    title = "Food Expenditure Composition by Survey Round",
    subtitle = "Share of total food spending by food group",
    x = "Survey year",
    y = "Expenditure share",
    fill = "Food group"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "right"
  )

A key indicator of dietary transition is the shift from cereal-dominated diets toward more diverse food groups including animal-source foods.

cereals_meat <- ics_consumption_by_food_group(
  con,
  food_group = c(
    "Cereals", "Egg, Fish & Meat", "Milk & Milk Products",
    "Edible Oil", "Vegetables"
  )
)

ggplot(cereals_meat, aes(x = round_year, y = spending_share, colour = food_group)) +
  geom_line(linewidth = 1) +
  geom_point(size = 2.5) +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_colour_brewer(palette = "Set1") +
  labs(
    title = "Dietary Transition: Key Food Groups Over Time",
    subtitle = "Share of total food expenditure",
    x = "Survey year",
    y = "Expenditure share",
    colour = "Food group"
  ) +
  theme_minimal(base_size = 11)

State-Level Consumption Patterns (HCES 2022-23)

The latest HCES 2022-23 round covers 36 states with 13.7 million item records. Here we show mean food expenditure by state.

state_exp <- ics_consumption_by_state(con, round = "HCES-2022-23", min_obs = 100)

if (nrow(state_exp) > 0) {
  state_exp |>
    filter(!is.na(state_name)) |>
    arrange(desc(mean_value)) |>
    head(20) |>
    ggplot(aes(x = reorder(state_name, mean_value), y = mean_value)) +
    geom_col(fill = "#2171B5") +
    coord_flip() +
    labs(
      title = "Mean Food Expenditure by State (HCES 2022-23)",
      subtitle = "Survey-weighted mean per item record (Rs.)",
      x = NULL,
      y = "Mean expenditure (Rs.)"
    ) +
    theme_minimal(base_size = 11)
}

State-Level by Food Group

Which states spend more on cereals vs. milk vs. meat?

state_fg <- ics_consumption_by_state(
  con,
  round = "HCES-2022-23",
  by_food_group = TRUE,
  min_obs = 100
)

if (nrow(state_fg) > 0) {
  # Top 5 food groups by total spending
  top_fg <- state_fg |>
    group_by(food_group) |>
    summarise(total = sum(total_value, na.rm = TRUE), .groups = "drop") |>
    arrange(desc(total)) |>
    head(5) |>
    pull(food_group)

  state_fg |>
    filter(food_group %in% top_fg, !is.na(state_name)) |>
    group_by(state_name) |>
    mutate(share = total_value / sum(total_value)) |>
    ungroup() |>
    filter(state_name %in% (state_fg |>
      group_by(state_name) |>
      summarise(n = sum(n), .groups = "drop") |>
      arrange(desc(n)) |>
      head(15) |>
      pull(state_name))) |>
    ggplot(aes(x = state_name, y = share, fill = food_group)) +
    geom_col(position = "stack") +
    coord_flip() +
    scale_y_continuous(labels = scales::percent_format()) +
    scale_fill_brewer(palette = "Set2") +
    labs(
      title = "Food Group Composition by State (HCES 2022-23)",
      subtitle = "Top 15 states by sample size, top 5 food groups",
      x = NULL,
      y = "Expenditure share",
      fill = "Food group"
    ) +
    theme_minimal(base_size = 10)
}

Item-Level Queries: Rice, Chicken, Milk

You can query any specific food item across all years, not just food groups. The package searches both item codes and item names across rounds.

# What items are available?
ics_search_items("rice")
#> # A tibble: 7 × 5
#>   item_code item_name                  category              section recall_days
#>   <chr>     <chr>                      <chr>                   <dbl>       <int>
#> 1 061       Rice (free/PMGKAY)         Cereals                   5.1          30
#> 2 101       Rice (PDS)                 Cereals                   5.1          30
#> 3 102       Rice (other sources)       Cereals                   5.1          30
#> 4 103       Chira (flattened rice)     Cereals                   5.1          30
#> 5 105       Muri (puffed rice)         Cereals                   5.1          30
#> 6 106       Other rice products        Cereals                   5.1          30
#> 7 015       Noodles (cup/rice noodles) Packaged Processed F…     7.2           7
ics_search_items("chicken")
#> # A tibble: 1 × 5
#>   item_code item_name category         section recall_days
#>   <chr>     <chr>     <chr>              <dbl>       <int>
#> 1 195       Chicken   Egg, Fish & Meat     6.5           7
ics_search_items("oil")
#> # A tibble: 10 × 5
#>    item_code item_name                              category section recall_days
#>    <chr>     <chr>                                  <chr>      <dbl>       <int>
#>  1 075       Edible oil (free/PMGKAY)               Edible …     6.6           7
#>  2 095       Edible oil: others                     Edible …     6.6           7
#>  3 181       Mustard oil                            Edible …     6.6           7
#>  4 182       Groundnut oil                          Edible …     6.6           7
#>  5 183       Coconut oil                            Edible …     6.6           7
#>  6 184       Refined oil (sunflower/soyabean/palm/… Edible …     6.6           7
#>  7 185       Other edible oil                       Edible …     6.6           7
#>  8 188       Edible oil (PDS)                       Edible …     6.6           7
#>  9 189       Edible oil: sub-total                  Edible …     6.6           7
#> 10 260       Oilseeds                               Spices       6.7           7
# Track specific items across all years
rice <- ics_query_item(con, "rice")
chicken <- ics_query_item(con, "chicken")
milk_liquid <- ics_query_item(con, "milk: liquid")
mustard_oil <- ics_query_item(con, "mustard oil")

items_combined <- bind_rows(rice, chicken, milk_liquid, mustard_oil)

if (nrow(items_combined) > 0) {
  ggplot(items_combined, aes(x = round_year, y = mean_value, colour = item)) +
    geom_line(linewidth = 1) +
    geom_point(size = 2.5) +
    labs(
      title = "Expenditure Trends for Specific Food Items",
      subtitle = "Survey-weighted mean expenditure (Rs.) per item record",
      x = "Survey year",
      y = "Mean expenditure (Rs.)",
      colour = "Item"
    ) +
    theme_minimal(base_size = 11)
}

# Rice expenditure by state in HCES 2022-23
rice_by_state <- ics_query_item(con, "rice", round = "HCES-2022-23", by = "state")

if (nrow(rice_by_state) > 0) {
  rice_by_state |>
    filter(!is.na(state_name), n >= 100) |>
    arrange(desc(mean_value)) |>
    head(15) |>
    ggplot(aes(x = reorder(state_name, mean_value), y = mean_value)) +
    geom_col(fill = "#E69F00") +
    coord_flip() +
    labs(
      title = "Rice Expenditure by State (HCES 2022-23)",
      subtitle = "Survey-weighted mean (Rs.), top 15 states",
      x = NULL,
      y = "Mean expenditure (Rs.)"
    ) +
    theme_minimal(base_size = 11)
}

Era Comparison: Pre-2022 vs Post-2022

The HCES 2022-23 introduced a fundamentally different survey methodology (three-visit design). While direct comparison with earlier rounds requires caution, we can compare the broad patterns.

era_shares <- shares |>
  mutate(era = ifelse(round_year >= 2022, "Post-2022\n(HCES)", "Pre-2022\n(NSS)")) |>
  group_by(era, food_group) |>
  summarise(avg_share = mean(spending_share, na.rm = TRUE), .groups = "drop")

if (nrow(era_shares) > 0) {
  era_shares |>
    filter(food_group %in% top_groups) |>
    ggplot(aes(x = reorder(food_group, avg_share), y = avg_share, fill = era)) +
    geom_col(position = "dodge", width = 0.7) +
    coord_flip() +
    scale_y_continuous(labels = scales::percent_format()) +
    scale_fill_manual(values = c("Pre-2022\n(NSS)" = "#4292C6", "Post-2022\n(HCES)" = "#EF6548")) +
    labs(
      title = "Food Expenditure Shares: Pre-2022 vs Post-2022 Era",
      subtitle = "Average share across rounds within each era",
      x = NULL,
      y = "Average expenditure share",
      fill = "Era"
    ) +
    theme_minimal(base_size = 11)
}

Cleanup

Data Sources

  • NSS Consumer Expenditure Surveys, rounds 43rd (1988) through 68th (2012)
  • HCES 2022-23 and 2023-24 (Ministry of Statistics, Government of India)
  • Downloaded from microdata.gov.in